SDG14

Published

February 7, 2023

Import packages and set things up

Code
import numpy as np
import pandas as pd
import os
import csv
import composite as ci
import inspect
from functools import reduce
import plotly.express as px
from sklearn.preprocessing import RobustScaler
import plotly
import re
from scipy.stats import pearsonr
import plotly.graph_objects as go
import datetime as dt
from scipy import stats
from plotly.subplots import make_subplots


# from xml.etree import ElementTree as ET
# import requests
# import json
# import webbrowser
Code
# for VSC users, Latex in Plotly is not working
# Workaround from https://github.com/microsoft/vscode-jupyter/issues/8131
from IPython.display import display, HTML

plotly.offline.init_notebook_mode()
display(
    HTML(
        '<script type="text/javascript" async src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.1/MathJax.js?config=TeX-MML-AM_SVG"></script>'
    )
)

# For plotly figures to show on printed version
plotly.offline.init_notebook_mode()
Code
# show all output from cells
from IPython.core.interactiveshell import InteractiveShell

InteractiveShell.ast_node_interactivity = "all"  # last_expr
# show all columns
pd.set_option("display.max_columns", None)
Code
# create output folder if not there
if not os.path.exists("../output"):
    os.makedirs("../output")

Country names dict

Code
# load countries dictionary
country_to_abbrev = (
    pd.read_csv("../data/countryAbbr.csv", header=None, index_col=0).squeeze().to_dict()
)
# invert the dictionary
abbrev_to_country = dict(map(reversed, country_to_abbrev.items()))
# add additional abbreviations
abbrev_to_country["EL"] = "Greece"
abbrev_to_country["GB"] = "United Kingdom"

allEurope = pd.read_csv("../data/Countries-Europe.csv")
allEurope = allEurope.name.to_list()

# fmt: off
# countries included in the assessment
countries = [
    'Belgium','Germany','Denmark','Estonia','Spain','Finland','France',
'Ireland','Lithuania','Latvia','Netherlands','Poland','Portugal','Sweden',
'United Kingdom'
]
# countries used to calculate some variable targets (EU + UK)
countriesEU = [
    "Belgium", "Bulgaria", "Cyprus", "Greece", "Germany", "Croatia",
    "Italy", "Denmark", "Estonia", "Spain", "Finland", "France",
    "Ireland", "Lithuania", "Latvia", "Malta", "Netherlands", "Poland",
    "Portugal", "Romania", "Sweden", "United Kingdom",]
countryAbb = [
    x if x not in country_to_abbrev else country_to_abbrev[x] for x in countries
]
# fmt: on

Define general functions

Code
def missingCountries(df, countries=countries):
    ### check missing countries in dataset
    missing = []
    for i in countries:
        if i not in df.index.unique():
            missing.append(i)
    if len(missing) > 0:
        print("Missing countries:\n", "\n".join(missing))
    else:
        print("No missing countries")


def negativeValues(ds):
    ### check negative values in dataset
    if ds[ds < 0].count().sum() > 0:
        # replace negative values with 0
        ds[ds < 0] = 0
        print("Negative values in dataset replaced with 0")


def calculate_target(df, valueCol, countryCol):
    """
    Calculate target (max/min) for each country based on max/min of 3 top countries after
    calculating max/min for each country after dropping outliers using Interquartile Range (IQR) proximity rule
    df: dataframe
    valueCol: column name of value
    countryCol: column name of country
    """
    # we drop outliers using Interquartile Range (IQR) proximity rule
    quartile_1, quartile_3 = np.percentile(df[valueCol], [25, 75])
    iqr = quartile_3 - quartile_1
    lower_bound = quartile_1 - 1.5 * iqr
    upper_bound = quartile_3 + 1.5 * iqr
    dfOutliers = df[~(df[valueCol] > upper_bound) | (df[valueCol] < lower_bound)]
    # target is max/min of 3 top countries after calculating max/min for each country
    avgMax3 = (
        dfOutliers[valueCol].groupby(dfOutliers[countryCol]).max().nlargest(3).mean()
    )
    avgMin3 = (
        dfOutliers[valueCol].groupby(dfOutliers[countryCol]).min().nsmallest(3).mean()
    )
    return avgMax3, avgMin3

SDG Official Data

SDG14 metadata can be found here

Code
# SDG14 indicators from the UNstats
# https://unstats.un.org/sdgs/dataportal/database

sdg14 = pd.read_excel("../data/Goal14_april2023.xlsx", sheet_name=0)
# fmt: off
sdg14 = sdg14[
    [ 
        "Target", "Indicator", "SeriesCode", "GeoAreaName", 
        "TimePeriod", "Value", "Units", "SeriesDescription" 
    ]
]
# fmt: on

sdg14.Value.replace("N", np.nan, inplace=True)
sdg14.Value = sdg14.Value.astype(np.float64)

# filter countries
# sdg14 = sdg14[sdg14['GeoAreaName'].isin(countries)]
# show indicators
pd.DataFrame(
    sdg14.groupby(["Indicator", "SeriesCode", "SeriesDescription", "Units"]).size()
)
0
Indicator SeriesCode SeriesDescription Units
14.1.1 EN_MAR_BEALITSQ Beach litter per square kilometer (Number) NUMBER 611
EN_MAR_BEALIT_BP Beach litter originating from national land-based sources that ends in the beach (%) PERCENT 900
EN_MAR_BEALIT_BV Beach litter originating from national land-based sources that ends in the beach (Tonnes) TONNES 900
EN_MAR_BEALIT_EXP Exported beach litter originating from national land-based sources (Tonnes) TONNES 900
EN_MAR_BEALIT_OP Beach litter originating from national land-based sources that ends in the ocean (%) PERCENT 900
EN_MAR_BEALIT_OV Beach litter originating from national land-based sources that ends in the ocean (Tonnes) TONNES 900
EN_MAR_CHLANM Chlorophyll-a anomaly, remote sensing (%) PERCENT 2784
EN_MAR_CHLDEV Chlorophyll-a deviations, remote sensing (%) PERCENT 4131
EN_MAR_PLASDD Floating plastic debris density (count per km2) NUMBER 3
14.2.1 EN_SCP_ECSYBA Number of countries using ecosystem-based approaches to managing marine areas (1 = YES; 0 = NO) NUMBER 33
14.3.1 ER_OAW_MNACD Average marine acidity (pH) measured at agreed suite of representative sampling stations PH 903
14.4.1 ER_H2O_FWTL Proportion of fish stocks within biologically sustainable levels (not overexploited) (%) PERCENT 422
14.5.1 ER_MRN_MPA Average proportion of Marine Key Biodiversity Areas (KBAs) covered by protected areas (%) PERCENT 6049
14.6.1 ER_REG_UNFCIM Progress by countries in the degree of implementation of international instruments aiming to combat illegal, unreported and unregulated fishing (level of implementation: 1 lowest to 5 highest) NUMBER 882
14.7.1 EN_SCP_FSHGDP Sustainable fisheries as a proportion of GDP PERCENT 5880
14.a.1 ER_RDE_OSEX National ocean science expenditure as a share of total research and development funding (%) PERCENT 159
14.b.1 ER_REG_SSFRAR Degree of application of a legal/regulatory/policy/institutional framework which recognizes and protects access rights for small-scale fisheries (level of implementation: 1 lowest to 5 highest) NUMBER 882
14.c.1 ER_UNCLOS_IMPLE Score for the implementation of UNCLOS and its two implementing agreements (%) PERCENT 63
ER_UNCLOS_RATACC Score for the ratification of and accession to UNCLOS and its two implementing agreements (%) PERCENT 63

Check official data by indicator

Code
# check sdg indicator, change SeriesCode to the one you want to check
# Example with Average marine acidity (pH) ER_OAW_MNACD
sdg14.loc[
    sdg14["GeoAreaName"].str.contains(r"^(?=.*United)(?=.*Kingdom)"), "GeoAreaName"
] = "United Kingdom"
sdg14check = sdg14[sdg14["GeoAreaName"].isin(countries)]
sdg14check = sdg14check[sdg14check["SeriesCode"] == "ER_OAW_MNACD"].pivot_table(
    columns="TimePeriod", index="GeoAreaName", values="Value", aggfunc="mean"
)
mr = sdg14check.columns[-3:].to_list()
sdg14check[mr]
missingCountries(sdg14check)
TimePeriod 2019 2020 2021
GeoAreaName
Belgium 7.794000 8.139600 7.8710
Estonia 7.783000 7.916000 7.7255
Finland 8.223464 NaN NaN
France 8.023000 NaN NaN
Netherlands 7.904056 NaN NaN
Spain 8.036000 NaN NaN
Sweden 8.082261 8.117167 NaN
United Kingdom 7.988000 NaN NaN
Missing countries:
 Germany
Denmark
Ireland
Lithuania
Latvia
Poland
Portugal

14.1

(a) Index of coastal eutrophication

We use: 1. Gross Nutrient Balance. Target defined by top 3 countries 2. Marine waters affected by eutrophication. Target defined by top 3 countries

Gross nutrient balance (kg/ha), Eurostat

Source

Code
# Gross nutrient balance (BAL_UAA, kg/ha), Eurostat
# https://ec.europa.eu/eurostat/databrowser/view/AEI_PR_GNB__custom_153613/

nutrient = pd.read_csv("../data/aei_pr_gnb__custom_7309259_linear.csv")
nutrient["geo"] = nutrient["geo"].map(abbrev_to_country).fillna(nutrient["geo"])
years = range(2012, nutrient.TIME_PERIOD.max() + 1)
# We use all countries in the EU to calculate the target value.
nutrient = nutrient[(nutrient["geo"].isin(countriesEU)) & (nutrient["TIME_PERIOD"].isin(years))]
# Countries can have nutrient deficiency (negative value) indicating declining soil fertility.
negativeValues(nutrient.OBS_VALUE)
nitro = nutrient[nutrient["nutrient"] == "N"].copy()
phospho = nutrient[nutrient["nutrient"] == "P"].copy()
# For eutrophication, the less nutrient balance the better.
def nutrientTarget(df):
    # we replace 0 with nan to calculate meaningful target
    target = min(calculate_target(df.replace(0, np.nan), "OBS_VALUE", "geo"))
    df["target"] = (
    (target / df.OBS_VALUE * 100).replace([np.inf, -np.inf], 100).clip(0, 100)
)
    df = df.pivot_table(
    columns="TIME_PERIOD", index="geo", values="target", aggfunc="mean"
)
    return df
nitro = nutrientTarget(nitro)
phospho = nutrientTarget(phospho)
years = [2012, 2016, nutrient.TIME_PERIOD.max()]
nutrientDict = {"nitro": nitro, "phospho": phospho}
for k, v in nutrientDict.items():
    print("Indicator values for ", k)
    v[years][v.index.isin(countries)]
    missingCountries(v)
Negative values in dataset replaced with 0
Indicator values for  nitro
No missing countries
Indicator values for  phospho
No missing countries
TIME_PERIOD 2012 2016 2018
geo
Belgium 9.829956 NaN NaN
Denmark 16.866507 NaN NaN
Estonia 50.238095 NaN NaN
Finland 29.614035 29.676512 24.252874
France 59.858156 29.614035 34.477124
Germany 18.730581 20.371711 18.057338
Ireland 40.075973 26.001232 NaN
Latvia 58.611111 55.380577 53.282828
Lithuania 48.173516 NaN 32.337165
Netherlands 8.274510 7.232219 7.176871
Poland 29.244629 31.897203 22.761597
Portugal 32.042521 30.121342 30.915751
Spain 41.865079 36.068376 NaN
Sweden 45.085470 38.328792 23.212321
United Kingdom 16.076190 16.262042 NaN
TIME_PERIOD 2012 2016 2018
geo
Belgium 3.333333 NaN NaN
Denmark 2.857143 NaN NaN
Estonia 100.000000 NaN NaN
Finland 5.405405 5.555556 3.448276
France 100.000000 6.896552 9.523810
Germany 100.000000 100.000000 100.000000
Ireland 1.104972 0.921659 NaN
Latvia 20.000000 28.571429 5.405405
Lithuania 2.857143 NaN 50.000000
Netherlands 6.250000 4.255319 2.898551
Poland 6.666667 25.000000 5.000000
Portugal 4.651163 3.571429 3.508772
Spain 4.444444 2.898551 NaN
Sweden 100.000000 100.000000 4.878049
United Kingdom 3.333333 3.333333 NaN
Marine waters affected by eutrophication

Source

Code
# Marine waters affected by eutrophication (km2 and % EEZ), Eurostat
# https://ec.europa.eu/eurostat/databrowser/view/sdg_14_60/default/table?lang=en

eutro = pd.read_csv("../data/sdg_14_60_linear.csv")
eutro["geo"] = eutro["geo"].map(abbrev_to_country).fillna(eutro["geo"])
years = range(2012, eutro.TIME_PERIOD.max() + 1)
eutro = eutro[
    eutro["geo"].isin(countriesEU)
    & (eutro["unit"] == "PC")
    & (eutro["TIME_PERIOD"].isin(years))
]  # KM2 for area
# The less eutrophication the better. We ignore 0 values to calculate meaningful target
target = min(calculate_target(eutro[eutro.OBS_VALUE != 0], "OBS_VALUE", "geo"))
eutro["target"] = (
    (target / eutro.OBS_VALUE * 100).replace([np.inf, -np.inf], 100).clip(0, 100)
)
years = [2012, 2016, eutro.TIME_PERIOD.max()]
eutro = eutro.pivot_table(
    columns="TIME_PERIOD", index="geo", values="target", aggfunc="mean"
)
eutro[years][eutro.index.isin(countries)]
missingCountries(eutro)
TIME_PERIOD 2012 2016 2022
geo
Belgium 100.000000 100.000000 100.000000
Denmark 100.000000 50.000000 0.409836
Estonia 3.703704 5.555556 100.000000
Finland 3.703704 2.702703 0.781250
France 100.000000 33.333333 2.631579
Germany 100.000000 100.000000 50.000000
Ireland 100.000000 100.000000 16.666667
Latvia 11.111111 7.692308 100.000000
Lithuania 50.000000 5.000000 100.000000
Netherlands 100.000000 100.000000 5.555556
Poland 25.000000 10.000000 100.000000
Portugal 1.923077 3.571429 0.523560
Spain 5.882353 5.555556 1.666667
Sweden 14.285714 4.545455 0.149925
Missing countries:
 United Kingdom

(b) Floating Plastic Debris Density

We use two indicators:

  1. Plastic Waste kg/ha. Target defined by top 3 countries

  2. Recovery Rate of Plastic Packaging. Target defined by top 3 countries

1. Plastic Waste kg/ha

Source

Code
# Plastic Waste kg/hab, Eurostat
# https://ec.europa.eu/eurostat/databrowser/view/ENV_WASGEN/

wasteG = pd.read_csv("../data/env_wasgen.csv")
years = range(2012, wasteG.TIME_PERIOD.max() + 1)
wasteG["geo"] = wasteG["geo"].map(abbrev_to_country).fillna(wasteG["geo"])
wasteG = wasteG[
    wasteG["geo"].isin(countriesEU)
    & wasteG["TIME_PERIOD"].isin(years)
    & wasteG["unit"].isin(["KG_HAB"])
]
# The less plastic waste, the better.
target = min(calculate_target(wasteG, "OBS_VALUE", "geo"))
wasteG["target"] = (
    (target / wasteG.OBS_VALUE * 100).replace([np.inf, -np.inf], 100).clip(0, 100)
)
years = [2012, 2016, wasteG.TIME_PERIOD.max()]
wasteG = wasteG.pivot_table(
    columns="TIME_PERIOD", index="geo", values="target", aggfunc="mean"
)
wasteG[years][wasteG.index.isin(countries)]
missingCountries(wasteG)
TIME_PERIOD 2012 2016 2020
geo
Belgium 13.559322 12.698413 9.302326
Denmark 42.105263 40.000000 33.333333
Estonia 47.058824 24.242424 25.000000
Finland 47.058824 50.000000 33.333333
France 32.000000 27.586207 22.222222
Germany 25.806452 24.242424 21.621622
Ireland 30.769231 25.806452 20.000000
Latvia 72.727273 25.000000 13.559322
Lithuania 47.058824 25.806452 20.000000
Netherlands 23.529412 25.806452 21.621622
Poland 32.000000 23.529412 13.559322
Portugal 47.058824 26.666667 18.604651
Spain 33.333333 50.000000 42.105263
Sweden 44.444444 25.000000 24.242424
United Kingdom 21.621622 17.777778 NaN
No missing countries
2. Recovery rates for packaging waste, Plastic Packaging

Source

Code
# Recovery rates for packaging waste, Plastic Packaging, Percentage, Eurostat
# https://ec.europa.eu/eurostat/databrowser/view/ten00062/

wasteR = pd.read_csv("../data/ten00062.csv")
years = range(2012, wasteR.TIME_PERIOD.max() + 1)
wasteR["geo"] = wasteR["geo"].map(abbrev_to_country).fillna(wasteR["geo"])
wasteR = wasteR[wasteR["geo"].isin(countriesEU) & wasteR["TIME_PERIOD"].isin(years)]
# The more recovery, the better.
target = max(calculate_target(wasteR, "OBS_VALUE", "geo"))
wasteR["target"] = (
    (wasteR.OBS_VALUE / target * 100).replace([np.inf, -np.inf], 100).clip(0, 100)
)
years = [2012, 2016, wasteR.TIME_PERIOD.max()]
wasteR = wasteR.pivot_table(
    columns="TIME_PERIOD", index="geo", values="target", aggfunc="mean"
)
wasteR[years][wasteR.index.isin(countries)]
missingCountries(wasteR)
TIME_PERIOD 2012 2016 2020
geo
Belgium 92.885772 99.699399 99.599198
Denmark 99.599198 98.296593 73.246493
Estonia 44.088176 85.971944 87.575150
Finland 51.102204 97.394790 99.599198
France 64.128257 64.629259 71.943888
Germany 99.899800 100.000000 100.000000
Ireland 74.448898 79.859719 100.000000
Latvia 38.777555 41.883768 47.995992
Lithuania 38.977956 74.549098 63.527054
Netherlands 98.296593 95.991984 95.090180
Poland 26.252505 54.909820 NaN
Portugal 39.478958 50.000000 57.014028
Spain 53.306613 61.923848 55.611222
Sweden 58.216433 61.823647 50.801603
United Kingdom 38.176353 58.617234 NaN
No missing countries

14.2

Proportion of national exclusive economic zones managed using ecosystem-based approaches

We use two indicators:

  1. Progress of implementation of Maritime Spatial Planning. Categorical data

  2. OHI Biodiversity. No further transformation

1. Progress of implementation of Maritime Spatial Planning

Source

Code
msp = pd.read_csv("../data/mspData.csv")
dictScore = {
    "implemented": 4,
    "complete not implemented": 3,
    "under development": 2,
    "not started": 1,
}
mspVal = (
    msp[["Country", "2012", "2016", "2022"]].set_index("Country").replace(dictScore)
)
mspVal = mspVal * 1 / len(dictScore) * 100
mspVal
missingCountries(mspVal)
2012 2016 2022
Country
Belgium 50.0 100.0 100.0
Germany 100.0 100.0 100.0
Denmark 25.0 25.0 100.0
Estonia 25.0 50.0 100.0
Spain 25.0 25.0 75.0
Finland 25.0 25.0 100.0
France 25.0 25.0 100.0
Ireland 25.0 25.0 100.0
Lithuania 50.0 75.0 100.0
Latvia 50.0 50.0 100.0
Netherlands 100.0 100.0 100.0
Poland 25.0 50.0 100.0
Portugal 25.0 50.0 100.0
Sweden 25.0 50.0 100.0
United Kingdom 25.0 50.0 100.0
No missing countries
2. OHI Biodiversity

Source

Code
# OHI 'Biodiversity' Index
# https://oceanhealthindex.org/global-scores/data-download/

ohiBio = pd.read_csv("../data/scoresOHI.csv")
ohiBio = ohiBio[ohiBio["region_name"].isin(countries)]
ohiBio = ohiBio[(ohiBio.long_goal == "Biodiversity") & (ohiBio.dimension == "status")]
ohiBio = ohiBio.pivot_table(
    columns="scenario", index="region_name", values="value", aggfunc="mean"
)

ohiBio[[2012, 2016, 2022]]
missingCountries(ohiBio)
scenario 2012 2016 2022
region_name
Belgium 75.28 72.45 71.84
Denmark 74.24 73.46 72.39
Estonia 82.49 77.99 76.10
Finland 81.00 74.86 73.82
France 72.60 71.76 74.12
Germany 75.06 72.28 71.52
Ireland 69.19 67.30 66.23
Latvia 78.47 73.53 72.59
Lithuania 77.68 72.18 71.80
Netherlands 68.51 67.88 67.97
Poland 69.42 67.46 66.59
Portugal 75.57 73.36 71.87
Spain 72.16 69.76 68.17
Sweden 76.72 75.57 72.54
United Kingdom 69.87 73.08 72.11
No missing countries

14.3

Average marine acidity (pH) measured at agreed suite of representative sampling stations.

We use:

  1. Estimates of sea surface pH. Target defined by top 3 countries

  2. Carbon Emissions per capita. Target defined by top 3 countries

Reconstructed estimates of sea surface pH assessed with GLODAPv2.2021

Source Copernicus

Code
# ph Calculations in pHCountryLevel.ipynb file
phCopernicus = pd.read_csv("../data/phCopernicus.csv", index_col=0)
# We set target pH as pre-industrial level https://www.eea.europa.eu/ims/ocean-acidification
target = 8.2
phCopernicus = phCopernicus / target * 100
phCopernicus
missingCountries(phCopernicus)
2012 2016 2021
Belgium 98.416817 98.372463 98.339183
Germany 98.233476 98.223317 98.103500
Denmark 98.363012 98.343317 98.238829
Estonia 97.613476 98.026768 97.609902
Spain 98.562378 98.445439 98.370024
Finland 97.765500 97.981232 97.717159
France 98.629780 98.511951 98.424024
Ireland 98.740963 98.616902 98.506000
Lithuania 96.980007 97.417059 97.082190
Latvia 97.096866 97.692573 97.198626
Netherlands 98.596902 98.471293 98.394500
Poland 96.469329 97.056435 96.550528
Portugal 98.583268 98.497232 98.378049
Sweden 97.437732 97.776390 97.489500
United Kingdom 98.674927 98.567567 98.460122
No missing countries

2. Carbon emissions per capita

Several sources (see code)

Code
# Population on 1 January, Eurostat
# https://ec.europa.eu/eurostat/databrowser/view/tps00001/

pop = pd.read_csv("../data/tps00001.csv")
pop["geo"] = pop["geo"].map(abbrev_to_country).fillna(pop["geo"])
pop = pop[pop["geo"].isin(countriesEU)]
pop.rename(columns={"OBS_VALUE": "population"}, inplace=True)
Code
# CO2, TOTX4_MEMONIA, THS_T thousand tonnes, Eurostat
# https://ec.europa.eu/eurostat/databrowser/view/ENV_AIR_GGE/

co2 = pd.read_csv("../data/env_air_gge.csv")
co2["geo"] = co2["geo"].map(abbrev_to_country).fillna(co2["geo"])
co2 = co2[
    co2["geo"].isin(countriesEU)
    & (co2.airpol == "CO2")
    & (co2.src_crf == "TOTX4_MEMONIA")
]

# Data to include UK. Data for other countries is almost the same as in the previous dataset
# Fossil CO2 emissions by country (territorial), million tonnes of carbon per year (1MtC = 1 million tonne of carbon = 3.664 million tonnes of CO2)
# https://globalcarbonbudget.org/carbonbudget/

co2GCB = pd.read_excel(
    "../data/National_Fossil_Carbon_Emissions_2022v1.0.xlsx",
    sheet_name=1,
    skiprows=11,
    index_col=0,
)
# convert from carbon to co2 and from Mt to thousand tonnes
co2GCB = co2GCB * 3.664 * 1_000
co2GCB_UK = (
    pd.DataFrame(co2GCB["United Kingdom"])
    .reset_index()
    .rename(columns={"index": "TIME_PERIOD", "United Kingdom": "OBS_VALUE"})
)
co2GCB_UK["geo"] = "United Kingdom"
co2 = pd.concat([co2, co2GCB_UK], ignore_index=True)
# thousand tonnes co2 to co2 per capita
co2 = pd.merge(co2, pop, on=["geo", "TIME_PERIOD"], how="left")
co2["co2pc"] = co2["OBS_VALUE"] * 1_000 / co2["population"]

years = range(2012, co2.TIME_PERIOD.max() + 1)
co2 = co2[co2.TIME_PERIOD.isin(years)]
# The less co2pc, the better.
target = min(calculate_target(co2, "co2pc", "geo"))
co2["target"] = (target / co2.co2pc * 100).replace([np.inf, -np.inf], 100).clip(0, 100)
years = [2012, 2016, co2.TIME_PERIOD.max()]
co2 = co2.pivot_table(
    columns="TIME_PERIOD", index="geo", values="target", aggfunc="mean"
)

co2[years][co2.index.isin(countries)]
missingCountries(co2)
TIME_PERIOD 2012 2016 2021
geo
Belgium 37.734121 39.459259 41.833681
Denmark 47.376240 51.718478 68.103705
Estonia 26.783744 27.085479 45.744259
Finland 36.906728 40.406167 51.708163
France 61.806654 66.847742 75.572829
Germany 34.783657 35.997628 43.288290
Ireland 41.662118 39.922992 46.737265
Latvia 93.965364 93.972191 91.681403
Lithuania 76.334698 77.951383 72.164336
Netherlands 34.459622 34.839212 42.925952
Poland 42.102450 42.241700 41.185795
Portugal 72.420665 69.650043 88.751684
Spain 58.295610 61.088196 72.095049
Sweden 70.390420 77.794760 95.307931
United Kingdom 47.256895 59.384996 NaN
No missing countries

14.4

Proportion of fish stocks within biologically sustainable levels

We use two indicators:

  1. FMSY/F: catch-weighted average

  2. B/BMSY: catch-weighted average

1. FMSY/F

Data compiled partially manually

Source of FMSY and F is /stockAssesmentYEAR

Source of country catches is /OfficialNominalCatches and stockAssessment[“year”] PDFs.

Code
fmsyF = pd.read_csv("../data/ices_data.csv", index_col=0, header=[0, 1])
fmsyF = fmsyF[["Fref/F"]] * 100
fmsyF = fmsyF.droplevel(0, axis=1)
# most recent assessment is for 2022
fmsyF.rename(columns={"most recent": 2022}, inplace=True)
fmsyF = fmsyF[fmsyF.index.isin(countries)]
fmsyF
missingCountries(fmsyF)
Year 2012 2016 2022
Belgium 84.278853 89.253671 97.182818
Estonia 95.038299 82.573446 77.562646
Finland 99.177365 91.229736 82.840916
France 91.013767 93.548510 93.938257
Germany 81.283936 87.261924 90.831432
Ireland 71.053790 62.685203 86.770330
Latvia 94.318542 89.406109 81.381134
Lithuania 91.787307 83.126001 82.901621
Netherlands 85.883804 88.278969 93.834910
Poland 89.263580 89.580011 74.853470
Portugal 78.732711 84.161276 94.842807
Spain 92.064992 88.531210 91.937691
Sweden 91.475417 78.225767 74.962550
United Kingdom 98.135098 95.872798 90.761277
Denmark 95.419499 85.849679 88.529445
No missing countries

2. B/BMSY

Data collected partially manually

Source of BMSY and B is /stockAssesmentYEAR

Source of country catches is /OfficialNominalCatches and stockAssessment[“year”] PDFs.

Code
bBmsy = pd.read_csv("../data/ices_data.csv", index_col=0, header=[0, 1])
bBmsy = bBmsy[["B/Bref"]] * 100
bBmsy = bBmsy.droplevel(0, axis=1)
bBmsy.rename(columns={"most recent": 2022}, inplace=True)
bBmsy = bBmsy[bBmsy.index.isin(countries)]
bBmsy
missingCountries(bBmsy)
Year 2012 2016 2022
Belgium 91.982255 94.662804 91.871386
Estonia 99.839423 99.933347 95.679809
Finland 99.843072 99.941251 95.909145
France 97.115681 96.936702 96.369335
Germany 90.088417 93.281519 93.439957
Ireland 92.130861 85.437192 89.825424
Latvia 97.564685 99.565412 97.360783
Lithuania 92.382483 99.191985 96.254817
Netherlands 92.736442 89.856357 96.706294
Poland 96.568884 98.135256 92.284320
Portugal 98.746282 97.786405 98.794184
Spain 99.443829 96.930800 96.504370
Sweden 96.430034 97.171833 91.413094
United Kingdom 96.715727 95.636901 97.260299
Denmark 96.031151 95.816545 95.730634
No missing countries

14.5

Coverage of protected areas in relation to marine area

We consider two indicators:

  1. Coverage of Natura 2000 areas in relation to marine areas. Top defined by top 3 countries
  2. Average proportion of Marine Key Biodiversity Areas (KBAs) covered by protected areas (%). No further transformation

1. Marine protected areas (% of territorial waters)

Source

Code
# EEZ area
# Data: https://emodnet.ec.europa.eu/geonetwork/srv/eng/catalog.search#/metadata/d4a3fede-b0aa-485e-b4b2-77e8e3801fd0
# https://www.europarl.europa.eu/factsheets/en/sheet/100/outermost-regions-ors-
outermost = [
    # Portugal
    "Azores",
    "Madeira",
    # Spain
    "Canary Islands",
    # we could include outermost regions of France,
    # but we care about EEZs in the Atlantic, Baltic and North Sea
]
eez = pd.read_csv(
    "../data/EMODnet_EEZ_v11_20210506.csv",
    delimiter=";",
    encoding="latin-1",
)

eez = eez[eez["Territory"].isin(outermost + countries)]
for country in ["France", "Portugal", "Spain"]:
    eez.loc[eez["Country"].str.contains(country), "Country"] = country
eez = eez.apply(pd.to_numeric, errors="ignore")
eez = eez[["Country", "Area_km2"]].groupby(["Country"]).sum(numeric_only=True)
eez = eez[eez.index.isin(countries)]
eez
Area_km2
Country
Belgium 3495
Denmark 105021
Estonia 36451
Finland 81553
France 345240
Germany 56763
Ireland 427039
Latvia 28353
Lithuania 6832
Netherlands 64328
Poland 29982
Portugal 1728718
Spain 1007673
Sweden 155347
United Kingdom 731309
Code
# https://www.eea.europa.eu/data-and-maps/dashboards/natura-2000-barometer (in Barometer table)
# Data is divided by year
natura2k = pd.DataFrame()
for root, dirs, files in os.walk("../data/natura2k"):
    for file in files:
        if file.endswith(".csv"):
            tempDF = pd.read_csv(
                os.path.join(root, file), sep="\t", encoding="utf-16", header=1
            )
            tempDF["year"] = re.findall("\d+", file[:-4])[0]
        natura2k = pd.concat([natura2k, tempDF], axis=0).reset_index(drop=True)
natura2k = natura2k.loc[:, natura2k.columns.str.contains("marine|year|Unnamed")]
natura2k.columns = np.concatenate([natura2k.iloc[0, :-1], natura2k.columns[-1:]])
natura2k = natura2k.iloc[1:].reset_index(drop=True)
natura2k.columns = natura2k.columns.str.strip()
natura2k = natura2k.set_index("Country")
natura2k = natura2k[natura2k.index.isin(countries)]
natura2kEEZ = natura2k.merge(eez, left_index=True, right_index=True)
natura2kEEZ = natura2kEEZ.apply(pd.to_numeric, errors="ignore")
natura2kEEZ["naturaEEZ"] = natura2kEEZ["Natura 2000"] / natura2kEEZ["Area_km2"] * 100
# target of 30% MPAs of EEZ
natura2kEEZ["Score"] = (100 * (natura2kEEZ.naturaEEZ) / 30).clip(upper=100)
natura2kEEZ = natura2kEEZ.pivot_table(index="Country", columns="year", values="Score")
natura2kEEZ[[2012, 2016, 2021]]
missingCountries(natura2kEEZ)
year 2012 2016 2021
Country
Belgium 100.000000 100.000000 100.000000
Denmark 60.473620 60.473620 60.473620
Estonia 61.763280 61.763280 61.763280
Finland 29.183476 29.183476 33.278972
France 40.225157 40.247364 100.000000
Germany 100.000000 100.000000 100.000000
Ireland 5.360166 8.007856 8.005514
Latvia 20.315311 51.575965 51.587721
Lithuania 32.933255 76.258782 76.258782
Netherlands 61.139784 78.156738 85.271318
Poland 80.459387 80.448269 80.559447
Portugal 0.507891 6.148101 8.182171
Spain 3.515029 27.920433 28.009086
Sweden 20.023989 43.406052 43.485444
United Kingdom 33.704859 57.729359 NaN
No missing countries

Average proportion of Marine Key Biodiversity Areas (KBAs) covered by protected areas (%)

Source: official SDG 14 data

Code
sdg14check = sdg14[sdg14["GeoAreaName"].isin(countries)]
kbaMPA = sdg14check[sdg14check["SeriesCode"] == "ER_MRN_MPA"].pivot_table(
    columns="TimePeriod", index="GeoAreaName", values="Value", aggfunc="mean"
)
kbaMPA[[2012, 2016, 2022]]
missingCountries(kbaMPA)
TimePeriod 2012 2016 2022
GeoAreaName
Belgium 96.49980 96.84682 96.85317
Denmark 86.66340 86.74673 86.74673
Estonia 97.60969 97.60982 97.62768
Finland 59.78557 60.80889 60.85425
France 62.51162 76.43594 81.88238
Germany 74.06325 74.51850 80.79027
Ireland 75.92082 75.97923 83.16451
Latvia 96.16348 96.16360 96.16360
Lithuania 65.53137 83.51997 83.51997
Netherlands 93.31337 96.64630 96.64630
Poland 87.25113 87.25113 87.31556
Portugal 68.28585 70.43105 70.76779
Spain 62.51987 85.84322 85.86279
Sweden 56.67376 59.73273 60.46026
United Kingdom 80.23372 80.43096 81.23336
No missing countries

14.6

Degree of implementation of international instruments aiming to combat illegal, unreported and unregulated fishing

We use two indicators:

  1. High-risk subsidies relative to total fishing subsidies. Target defined by top 3 countries
  2. IUU Fishing Index
  3. TAC/Catch.

1. High-risk subsidies relative to total fishing subsidies

Source

Code
# selection of subsidies by level of risk to fishery sustainability
# as per https://stats.oecd.org/Index.aspx?QueryId=121718
highRisk = [
    "I.E.1. Fuel tax concessions",
    "I.A.1. Transfers based on variable input use",
    "I.A.2.1.Support to vessel construction/purchase",
    "I.A.2.2.Support to modernisation",
    "I.A.2.3.Support to other fixed costs",
    "II.A. Access to other countries’ waters",
    "I.B.2. Special insurance system for fishers",
]

moderateRisk = [
    "I.B.1. Income support",
    "I.C. Transfers based on the reduction of productive capacity",
    "II.B.1. Capital expenditures",
    "II.B.2. Subsidized access to infrastructure",
]

uncertainRisk = [
    "I.D. Miscellaneous direct support to individuals and companies",
    "I.E.2. Other tax exemptions",
    "II.D. Support to fishing communities",
    "II.C. Marketing and promotion",
    "II.E. Education and training",
    "II.F. Research and development",
    "II.H. Miscellaneous support for services to the sector",
]


noRisk = [
    "II.G.1. Management expenditures",
    "II.G.2. Stock enhancement programs",
    "II.G.3. Enforcement expenditures",
]
Code
# Fisheries Support Estimate
# https://stats.oecd.org/Index.aspx?DataSetCode=FISH_FSE
# selection as per https://stats.oecd.org/Index.aspx?QueryId=121718

fse = pd.read_csv("../data/FISH_FSE.csv")
years = range(2012, fse.Year.max())
# we use US dollars because some countries use their own currency ('Danish Krone', 'Zloty', 'Swedish Krona')
fse = fse[
    (fse["Country"].isin(countriesEU))
    & (fse["Measure"].isin(["US dollar"]))
    & (fse["Year"].isin(years))
]

# strip variable codes and delete parent variables to avoid double counting
# solution from https://stackoverflow.com/q/76183612/14534411

separator = "."
fse["vCode"] = fse.Variable.str.rsplit(".", n=1).str.get(0)

variableAll = fse.vCode.unique().tolist()


def is_parent(p, target):
    return p.startswith(target) and len(p) > len(target) and p[len(target)] == "."


vSupport = []
prev = ""
for s in sorted(variableAll) + [""]:
    if prev and not is_parent(s, prev):
        vSupport.append(prev)
    prev = s

fse = fse[(fse.vCode.isin(vSupport)) | (fse.vCode.isna())]

# We calculate the ratio of high-risk subsidies to total subsidies
fseRisk = fse[fse.Variable.isin(highRisk)]
fseRisk = fseRisk.groupby(["Country", "Year"]).sum(numeric_only=True)
fse = fse.groupby(["Country", "Year"]).sum(numeric_only=True)
fseRiskRatio = (fseRisk / fse * 100).reset_index()
# The less high-risk subsidies the better. We ignore 0 values to calculate meaningful target
target = min(
    calculate_target(fseRiskRatio[fseRiskRatio.Value != 0], "Value", "Country")
)
fseRiskRatio["target"] = (
    (target / fseRiskRatio.Value * 100).replace([np.inf, -np.inf], 100).clip(0, 100)
)
years = [2012, 2016, fseRiskRatio.Year.max()]
fseRiskkRatio = fseRiskRatio.pivot_table(
    columns="Year", index="Country", values="target", aggfunc="mean"
)

fseRiskkRatio[years][fseRiskkRatio.index.isin(countries)]
missingCountries(fseRiskkRatio)
Year 2012 2016 2019
Country
Belgium 1.139037 2.272171 NaN
Denmark 0.698986 0.715540 0.772050
Estonia 7.391742 100.000000 3.199879
France 1.033066 NaN NaN
Germany NaN 4.147636 NaN
Ireland NaN 9.167110 NaN
Latvia NaN 100.000000 100.000000
Lithuania 1.498189 11.784437 1.451992
Netherlands 9.666849 3.594301 1.575328
Poland 0.441716 0.280389 0.356007
Portugal 1.130799 0.323543 0.479279
Spain 10.216785 3.002641 4.131087
Sweden 0.716534 0.632158 0.989415
United Kingdom 3.810226 4.267987 2.925930
Missing countries:
 Finland

3. IUU Fishing Index

Source

Code
selectedIUU = [
    "Accepted FAO Compliance Agreement",
    "Mandatory vessel tracking for commercial seagoing fleet",
    "Operate a national VMS/FMC centre",
    "Designated ports specified for entry by foreign vessels",
    "Ratification/accession of UNCLOS Convention",
    "Ratification/accession of UNFSA",
    "Have a NPOA-IUU ",
    "Compliance with RFMO flag state obligations",
    "Compliance with RFMO port state obligations",
]
Code
iuuIndex = pd.read_csv(
    "../data/iuu_fishing_index_2021-data_and_disclaimer/iuu_fishing_index_2019-2021_indicator_scores.csv"
)
iuuIndex = iuuIndex[
    (iuuIndex["Country"].isin(countries))
    & (iuuIndex["Indicator name"].isin(selectedIUU))
]

iuuIndex["Score"] = iuuIndex.groupby(
    ["Year", "Indicator name"], sort=False, group_keys=False
)["Score"].apply(lambda x: x.fillna(x.mean()))
iuuIndex = iuuIndex.pivot_table(
    index=["Country", "Year"], columns="Indicator name", values="Score"
)
alpha = 1 / len(iuuIndex.columns)
sigma = 10
compositeIUU = ci.compositeDF(alpha, iuuIndex, sigma)
compositeIUU = pd.DataFrame(compositeIUU, columns=["IUU"])
# The target is the best in the IIU Fishing Index = 1
compositeIUU["target"] = 1 / compositeIUU.IUU * 100
compositeIUU = compositeIUU.reset_index().pivot_table(
    index="Country", columns="Year", values="target"
)
compositeIUU
missingCountries(compositeIUU)
Year 2019 2021
Country
Belgium 100.000000 64.031078
Denmark 51.245497 65.768667
Estonia 70.952143 70.952143
Finland 53.277658 64.031078
France 54.608684 61.476344
Germany 51.245497 70.952143
Ireland 58.866791 70.952143
Latvia 90.295268 70.952143
Lithuania 51.245497 54.146791
Netherlands 46.306308 51.245497
Poland 72.707739 70.952143
Portugal 47.352555 70.194173
Spain 54.608684 48.688266
Sweden 70.952143 70.952143
United Kingdom 47.352555 41.915876
No missing countries

2. TAC/Catch

Data extracted partially manually.

Source of TAC is stockAssessment[“year”] PDFs.

Source of country catches is /OfficialNominalCatches and stockAssessment[“year”] PDFs.

Code
tacCatch = pd.read_csv("../data/ices_data.csv", index_col=0, header=[0, 1])
tacCatch = tacCatch[["TAC/Catch"]] * 100
tacCatch = tacCatch.droplevel(0, axis=1)
# most recent assessment is 2022
tacCatch.rename(columns={"most recent": 2022}, inplace=True)
tacCatch = tacCatch[tacCatch.index.isin(countries)]
tacCatch
missingCountries(tacCatch)
Year 2012 2016 2022
Belgium 78.480130 95.507005 97.255892
Estonia 98.404500 99.435507 96.478689
Finland 98.942568 99.843818 92.122689
France 84.168757 91.279043 97.873527
Germany 97.890177 96.075084 96.254821
Ireland 95.739977 94.352001 97.971417
Latvia 99.389419 99.008097 96.696104
Lithuania 98.155150 98.005229 95.209801
Netherlands 92.986309 95.043900 97.158599
Poland 99.762408 98.797493 94.911954
Portugal 96.065583 97.542063 98.963306
Spain 81.619492 94.504539 98.517713
Sweden 96.705334 98.484897 95.600203
United Kingdom 95.734499 93.625841 98.166726
Denmark 94.385126 96.196738 96.258764
No missing countries

14.7

Sustainable fisheries as a proportion of GDP in small island developing States, least developed countries and all countries

We use two indicators:

  1. ‘Livelihoods & economies’ Index as per Baltic Health Index (BHI)
  2. ‘Tourism’ Index as per BHI

1. ‘Livelihoods & economies’ Index

Divided into:

  1. Economies: Gross Value Added (GVA) annual growth rate of marine sectors against 1.5% target
  2. Livelihoods: Two subindicators: GVA/Hours Worked & FTE Employees/Coastal population

Source Blue Economy

Economies
Code
# Economies indicator, GVA annual growth rate of marine sectors
# GVA, Value added at factor cost - million euro
# https://blue-economy-observatory.ec.europa.eu/blue-economy-indicators_en
gva = pd.read_excel("../data/blueEconomyObservatory/valueAddedFactorCost.xlsx")
gva.rename(
    columns={
        "Value added at factor cost (€ million)": "gva",
        "Member State": "country",
    },
    inplace=True,
)
gva = gva[gva["country"].isin(countriesEU)]
gva = gva[gva.Sector != "-"]
gvaSector = gva.groupby(["country", "Sector", "Year"]).sum(numeric_only=True)
gvaSector["growthRate"] = gvaSector.groupby(["country", "Sector"])["gva"].pct_change()
# target of 1.5% annual growth rate
gvaSector["sectorScore"] = (gvaSector["growthRate"] / 0.015 * 100).clip(
    upper=100, lower=0
)
gvaSector["weight"] = gvaSector["gva"] / gvaSector.groupby(["country", "Year"])[
    "gva"
].transform("sum")

gvaSector["weightedScore"] = gvaSector["sectorScore"] * gvaSector["weight"]
gvaEcon = gvaSector.groupby(["country", "Year"]).sum(numeric_only=True)[
    ["gva", "weightedScore"]
]
years = [2012, 2016, gvaEcon.index.get_level_values("Year").max()]
economies = gvaEcon.reset_index().pivot_table(
    index="country", columns="Year", values="weightedScore"
)
economies[economies.index.isin(countries)][years]
missingCountries(economies)
Year 2012 2016 2020
country
Belgium 96.873934 21.658872 21.297335
Denmark 4.504192 22.266304 61.905965
Estonia 52.767271 48.123759 54.687999
Finland 35.661048 73.708100 30.287035
France 32.382788 28.159942 0.247155
Germany 38.803670 64.176248 5.469456
Ireland 87.952005 100.000000 28.923979
Latvia 97.411580 2.196562 19.610579
Lithuania 37.153807 61.513965 42.140579
Netherlands 84.178366 21.389797 40.346288
Poland 62.903198 59.160696 80.944135
Portugal 61.985137 96.467062 9.280987
Spain 3.312837 85.334154 0.000000
Sweden 43.109175 71.468051 14.636507
United Kingdom 100.000000 20.980994 0.000000
No missing countries
Livelihoods
Code
# Livelihoods - Quality
# Hours Worked https://blue-economy-observatory.ec.europa.eu/blue-economy-indicators_en
# Select Insight Advisor, search in Asset for the measure and select Dimensions: year, member state

# hoursWorked = pd.read_excel('../data/blueEconomyObservatory/hoursWorked.xlsx')
# hoursWorked.rename(columns={'Member State':'country_name','Year':'year','Hours worked by employees':'hoursWorked'}, inplace=True)
# gvaHoursW = hoursWorked.merge(gvaEcon.reset_index(), on=['country_name','year'], how='left')
# gvaHoursW['gvaHoursW'] = gvaHoursW['value'] / gvaHoursW['hoursWorked'] * 1_000_000
# gvaHoursW = gvaHoursW[['country_name','year','gvaHoursW']].set_index(['country_name','year'])
# gvaHoursW.pivot_table(index='country_name', columns='year', values='gvaHoursW')

# Above, I calculated the GVA per hour worked ratio using the raw data of each variable.
# The result is different from the ratio variable provided by the Blue Economy Observatory.
# I emailed them to ask for clarification.

gvaHoursW = pd.read_excel(
    "../data/blueEconomyObservatory/GVAperHourWorked.xlsx"
).set_index(["Year", "Member State"])
gvaHoursW.rename(
    columns={"Gross value added per hour worked by employees": "gvaHoursW"},
    inplace=True,
)
gvaHoursW["gvaHoursW"] = gvaHoursW["gvaHoursW"].apply(pd.to_numeric, errors="coerce")
# Comparative price levels https://ec.europa.eu/eurostat/databrowser/view/TEC00120/default/table?lang=en
pppEurostat = pd.read_csv("../data/pppEurostat.csv")
pppEurostat.geo = pppEurostat.geo.replace(abbrev_to_country)
pppEurostat = (
    pppEurostat[pppEurostat.geo.isin(countriesEU)]
    .rename(columns={"geo": "Member State", "TIME_PERIOD": "Year", "OBS_VALUE": "ppp"})[
        ["ppp", "Member State", "Year"]
    ]
    .set_index(["Member State", "Year"])
)
gvaHoursW = gvaHoursW.merge(pppEurostat, left_index=True, right_index=True)
gvaHoursW["gvaHoursWppp"] = gvaHoursW["gvaHoursW"] / gvaHoursW["ppp"] * 100
gvaHoursW = gvaHoursW[
    gvaHoursW.index.get_level_values("Year").isin(range(2012, 2023))
].reset_index()

# We set target using top 3 countries. The more gva/hours worked the better.
target = max(calculate_target(gvaHoursW, "gvaHoursWppp", "Member State"))
gvaHoursW["target"] = (
    (gvaHoursW.gvaHoursWppp / target * 100).replace([np.inf, -np.inf], 100).clip(0, 100)
)
years = [2012, 2016, gvaHoursW.Year.max()]
gvaHoursW = gvaHoursW.pivot_table(index="Member State", columns="Year", values="target")
gvaHoursW[years][gvaHoursW.index.isin(countries)]
missingCountries(gvaHoursW)
Year 2012 2016 2020
Member State
Belgium 73.226756 70.153897 79.084103
Estonia 13.004557 13.749318 18.786201
Finland 26.215711 27.738387 27.423564
Germany 36.439759 42.221763 48.441349
Ireland 23.178746 24.868589 22.363956
Latvia 7.777087 8.912797 13.547465
Lithuania 9.675183 15.099927 19.144132
Netherlands 100.000000 64.869507 46.389875
Poland 19.237395 20.772320 27.250130
Portugal 14.911299 16.776657 17.068778
Spain 25.501543 23.418572 26.326202
Sweden 30.692363 34.465893 36.985960
United Kingdom 86.269042 52.194932 67.755189
Missing countries:
 Denmark
France
Code
# Livelihoods - Quantity
# FTE employees https://blue-economy-observatory.ec.europa.eu/blue-economy-indicators_en

fteEmployment = pd.read_excel("../data/blueEconomyObservatory/employmentFTE.xlsx")
fteEmployment.rename(
    columns={
        "Member State": "country_name",
        "Year": "year",
        "Employees in full time equivalent units": "fteEmployees",
    },
    inplace=True,
)
# Coastal areas https://ec.europa.eu/eurostat/web/coastal-island-outermost-regions/methodology
coastArea = pd.read_excel("../data/NUTS2021Coastal.xlsx", sheet_name="Coastal regions")[
    ["NUTS_ID", "COASTAL CATEGORY"]
]
coastArea.rename(
    columns={"NUTS_ID": "geo", "COASTAL CATEGORY": "coastal"}, inplace=True
)
# Population https://ec.europa.eu/eurostat/databrowser/view/DEMO_R_PJANAGGR3__custom_7060174/default/table?lang=en
population = pd.read_csv("../data/demoNUTS3.csv")

coastalPop = coastArea.merge(population, on="geo", how="left")
# France and UK have three letter abbreviation codes
coastalPop["country"] = coastalPop.geo.str.extract(r"([a-zA-Z]{2})").replace(
    abbrev_to_country
)
coastalPop = coastalPop[coastalPop.country.isin(countriesEU)]
coastalPop = coastalPop.dropna(subset=["TIME_PERIOD"])
coastalPop["TIME_PERIOD"] = coastalPop["TIME_PERIOD"].astype(int)
coastalPop = (
    coastalPop.groupby(
        [
            "country",
            "TIME_PERIOD",
            "coastal",
        ]
    )
    .sum(numeric_only=True)
    .reset_index()
)
# 0 non-coastal, 1  coastal (on coast), 2 coastal (>= 50% of population living within 50km of the coastline)
coastalPop = (
    coastalPop[coastalPop["coastal"].isin([1, 2])]
    .groupby(["country", "TIME_PERIOD"])
    .sum()
)
fteCoastalPop = coastalPop.merge(
    fteEmployment,
    left_on=["country", "TIME_PERIOD"],
    right_on=["country_name", "year"],
    how="inner",
)
fteCoastalPop["fteCoastalPop"] = (
    fteCoastalPop["fteEmployees"] / fteCoastalPop["OBS_VALUE"]
)

# We set target using top 3 countries. The more FTE employees/Costal population the better
target = max(calculate_target(fteCoastalPop, "fteCoastalPop", "country_name"))
fteCoastalPop["target"] = (
    (fteCoastalPop.fteCoastalPop / target * 100)
    .replace([np.inf, -np.inf], 100)
    .clip(0, 100)
)
years = [2012, 2016, fteCoastalPop.year.max()]
fteCoastalPop = fteCoastalPop.pivot_table(
    index="country_name", columns="year", values="target"
)

fteCoastalPop[years][fteCoastalPop.index.isin(countries)]
missingCountries(fteCoastalPop)
year 2012 2016 2020
country_name
Belgium 8.301720 7.469303 7.612006
Denmark 15.210299 16.648394 16.145873
Estonia 73.518596 62.530963 41.382950
Finland 17.422097 14.952275 11.662557
France 15.224464 14.087975 12.409121
Germany 53.966767 60.393138 61.643754
Ireland 5.668473 8.099165 8.990206
Latvia 23.222019 26.817885 22.008593
Lithuania 73.725091 79.037007 91.062719
Netherlands 20.941432 12.987307 15.318658
Poland 36.379926 34.737284 35.336342
Portugal 15.794767 18.095508 17.850648
Spain 23.014225 24.831960 18.070642
Sweden 10.492028 11.198808 10.685918
United Kingdom 10.121654 9.924725 NaN
No missing countries

2. Tourism: GVA/nights spents

Code
# Tourism: GVA/nights spents
# Nights spent at tourist accommodation establishments in coastal areas
# https://ec.europa.eu/eurostat/databrowser/view/TOUR_OCC_NIN2DC__custom_7065857/default/table?lang=en

nightCoast = pd.read_csv("../data/nightsAccomoCoastal.csv")
nightCoast.geo = nightCoast.geo.replace(abbrev_to_country)
nightCoast = nightCoast[nightCoast.geo.isin(countriesEU)]
gvaAccomodation = gva[gva["Sub-sector"] == "Accommodation"]
gvaNights = gvaAccomodation.reset_index().merge(
    nightCoast,
    left_on=["country", "Year"],
    right_on=["geo", "TIME_PERIOD"],
    how="inner",
)
gvaNights["gvaNights"] = gvaNights["gva"] * 1_000_000 / gvaNights["OBS_VALUE"]
# We set target using top 3 countries. The more GVA/nights spents the better
target = max(calculate_target(gvaNights, "gvaNights", "country"))
gvaNights["target"] = (
    (gvaNights.gvaNights / target * 100).replace([np.inf, -np.inf], 100).clip(0, 100)
)
years = [2012, 2016, gvaNights.Year.max()]
gvaNights = gvaNights.pivot_table(index="country", columns="Year", values="target")

gvaNights[years][gvaNights.index.isin(countries)]
missingCountries(gvaNights)
Year 2012 2016 2020
country
Belgium 45.334240 38.058850 41.541088
Denmark 81.889205 93.267237 100.000000
Estonia 58.620529 58.491705 75.930535
Finland 65.419935 79.994586 55.925323
France 47.093545 47.012451 42.348381
Germany 70.673069 71.550445 85.368497
Ireland NaN 85.079465 100.000000
Latvia 38.809039 53.736533 47.795775
Lithuania 33.817135 52.212723 48.222261
Netherlands 39.665778 40.625908 31.666511
Poland 60.057201 41.072395 27.725916
Portugal 53.680933 57.461233 67.520145
Spain 53.868019 56.316363 44.842960
Sweden 62.006181 75.725888 74.427793
United Kingdom 59.320101 39.530034 NaN
No missing countries

14.a

Proportion of total research budget allocated to research in the field of marine technology

We use two indicators:

  1. Official UNSD indicator ER_RDE_OSEX. Target is top 3 countries
  2. SAD/TAC: Catch-weighted TAC relative to Scientific Advice

1. Ocean science expenditure ER_RDE_OSEX

Several sources

Note: ER_RDE_OSEX data comes from Global Ocean Science Report (GOSR) 2020, and goes from 2013 to 2017. Data for 2009-2012 data is available in the UNstats archive (download csv for 29-Mar-19)

Code
# %%time
# National ocean science expenditure as a share of total research and development funding (%), UNstats archive (years 2009-2012)
# https://unstats.un.org/sdgs/indicators/database/archive

# # read old data by chunks to speed loading and save 14.a.1 to separate file
# iter_csv = pd.read_csv('./data/AllData_Onward_20190329.csv', iterator=True, chunksize=1000, encoding='cp1252')
# oResearchOld = pd.concat([chunk[chunk.Indicator == '14.a.1'] for chunk in iter_csv])
# oResearchOld.to_csv('./data/archive14a1.csv', index=False)

oResearchOld = pd.read_csv("../data/archive14a1.csv")
oResearchOld = oResearchOld.pivot(
    index="GeoAreaName", columns="TimePeriod", values="Value"
)
Code
# National ocean science expenditure as a share of total research and development funding (%), UNstats
# https://unstats.un.org/sdgs/dataportal/database

# read official data and merge with archive data
oResearch = sdg14[sdg14["SeriesCode"] == "ER_RDE_OSEX"].pivot_table(
    columns="TimePeriod", index="GeoAreaName", values="Value", aggfunc="mean"
)
oResearch = oResearchOld.merge(
    oResearch, left_index=True, right_index=True, how="outer"
)
# use all countries in Europe
oResearch = oResearch[oResearch.index.isin(countriesEU)].dropna(how="all", axis=1)
# fill nan of year 2013 from new report
oResearch[2013] = oResearch["2013_y"].fillna(oResearch["2013_x"])
oResearch = oResearch[list(range(2013, 2018))]
# weighted by EEZ area
oResearch = oResearch.merge(eez, left_index=True, right_index=True, how="outer")
for col in oResearch.drop("Area_km2", axis=1).columns:
    oResearch[col] = (
        oResearch[col] * oResearch["Area_km2"] / (oResearch["Area_km2"].sum())
    )

oResearch = (
    oResearch.drop("Area_km2", axis=1)
    .stack()
    .reset_index()
    .rename(columns={0: "oResearch", "level_1": "Year", "level_0": "country"})
)

# We set target with top 3 countries. The more ocean science expenditure/total R&D funding, the better.
target = max(calculate_target(oResearch, "oResearch", "country"))
oResearch["target"] = (
    (oResearch.oResearch / target * 100).replace([np.inf, -np.inf], 100).clip(0, 100)
)
years = [2013, 2016, oResearch.Year.max()]
oResearch = oResearch.pivot_table(index="country", columns="Year", values="target")

oResearch[years][oResearch.index.isin(countries)]
missingCountries(oResearch)
Year 2013 2016 2017
country
Belgium 0.310608 0.233153 0.221183
Finland 6.119042 6.207167 6.142723
France 65.177100 NaN 43.513303
Germany 4.346289 3.390835 3.315717
Ireland NaN NaN 100.000000
Netherlands 3.818245 3.248525 3.598533
Poland 1.229127 0.853481 0.823581
Portugal NaN 100.000000 NaN
Spain 68.138571 100.000000 100.000000
United Kingdom 100.000000 94.759160 100.000000
Missing countries:
 Denmark
Estonia
Lithuania
Latvia
Sweden

2. SAD/TAC

Data extracted partially manually.

Source of TAC and SAD is stockAssessment[“year”] PDFs.

Source of country catches is /OfficialNominalCatches and stockAssessment[“year”] PDFs.

If a country fishes only on fish stocks where assignment of TAC follows scientific advice, it would score 100

Code
sadTac = pd.read_csv("../data/ices_data.csv", index_col=0, header=[0, 1])
sadTac = sadTac[["SAD/TAC"]] * 100
sadTac = sadTac.droplevel(0, axis=1)
# most recent assessment is 2022
sadTac.rename(columns={"most recent": "2022"}, inplace=True)
sadTac = sadTac[sadTac.index.isin(countries)]
sadTac
missingCountries(sadTac)
Year 2012 2016 2022
Belgium 98.292746 97.071052 96.615720
Estonia 92.753209 86.254153 90.900148
Finland 99.440656 97.749144 95.619503
France 90.323954 91.459570 92.379607
Germany 90.655052 85.395915 87.737917
Ireland 98.126385 87.353687 75.753058
Latvia 91.715604 82.541315 91.324412
Lithuania 96.064711 84.378674 88.919591
Netherlands 95.566376 88.761011 89.214834
Poland 94.916995 80.769729 86.833090
Portugal 98.862330 94.596274 91.951678
Spain 91.262037 84.589603 88.343857
Sweden 90.155366 89.159207 87.337151
United Kingdom 79.451505 81.326929 82.954885
Denmark 84.660093 77.694543 84.036427
No missing countries

14.b

Degree of application of a legal/regulatory/policy/institutional framework which recognizes and protects access rights for small‐scale fisheries

We use two indicators:

  1. OHI Artisanal Fishing Opportunities Index: No further transformation
  2. Percentage of Fish Species Threatened: No further transformation

1. OHI ‘Artisanal opportunities’ Index

Source

Code
# OHI 'Artisanal opportunities' Index
# https://oceanhealthindex.org/global-scores/data-download/

ohiArt = pd.read_csv("../data/scoresOHI.csv")
ohiArt = ohiArt[ohiArt["region_name"].isin(countries)]
ohiArt = ohiArt[
    (ohiArt.long_goal == "Artisanal opportunities") & (ohiArt.dimension == "status")
]
ohiArt = ohiArt.pivot_table(
    columns="scenario", index="region_name", values="value", aggfunc="mean"
)

ohiArt[[2012, 2016, 2022]]

missingCountries(ohiArt)
scenario 2012 2016 2022
region_name
Belgium 79.51 78.43 78.66
Denmark 70.50 76.03 74.63
Estonia 89.39 90.36 99.24
Finland 84.67 83.31 77.45
France 77.09 77.59 78.61
Germany 73.21 72.73 73.23
Ireland 69.90 73.52 73.52
Latvia 87.40 88.85 99.01
Lithuania 88.71 89.95 97.63
Netherlands 55.01 55.29 60.35
Poland 87.82 74.67 77.61
Portugal 77.81 64.99 66.08
Spain 73.45 71.37 73.08
Sweden 94.77 95.51 97.37
United Kingdom 81.35 84.41 79.79
No missing countries

2. Percentage of Fish Species Threatened

Source

Analysis done in iucnRedList.ipynb

Code
# Percentage of Fish Species Threatened
# Time series comparison is tricky: https://www.iucnredlist.org/assessment/red-list-index

threatenedFish = pd.read_csv("../data/threatenedFishIUCN.csv")
threatenedFish = threatenedFish.pivot_table(
    index="name", columns="year", values="threatenedScore"
)
threatenedFish

missingCountries(threatenedFish)
year 2012 2016 2022
name
Belgium 77.777778 87.786260 84.967320
Denmark 83.516484 89.944134 86.138614
Estonia 80.952381 89.473684 90.243902
Finland 81.818182 89.189189 90.000000
France 88.135593 93.216080 91.332611
Germany 77.611940 87.022901 82.894737
Ireland 86.592179 93.750000 91.885965
Latvia 77.272727 87.804878 88.636364
Lithuania 77.272727 87.500000 88.372093
Netherlands 83.132530 91.489362 89.047619
Poland 76.923077 87.234043 86.274510
Portugal 89.573460 94.731065 92.578125
Spain 89.938398 94.646465 92.513863
Sweden 77.272727 87.121212 84.027778
United Kingdom 86.729858 93.827160 91.337100
No missing countries

14.c

Number of countries making progress in ratifying, accepting and implementing through legal, policy and institutional frameworks, ocean-related instruments that implement international law, as reflected in the United Nations Convention on the Law of the Sea

We use two indicators:

  1. Participation in agreements of the International Marine Organization (IMO Participation Rate). No further transformation

1. Participation in agreements of the International Marine Organization

Source

Code
# dictionary for country codes used by the GISIS
with open("../data/gisisDict.csv") as csv_file:
    reader = csv.reader(csv_file)
    gisisDict = dict(reader)

gisisDict = {v: k for k, v in gisisDict.items()}
Code
# Participation in agreements of the International Marine Organization

# https://www.imo.org/en/About/Conventions/Pages/StatusOfConventions.aspx Excel file with current status of IMO conventions
# We get the historical data from the GISIS database: https://gisis.imo.org/Public/ST/Ratification.aspx
# You need to create account to access data.

# I tried to scrape the data but I am getting errors with Selenium and bs4.
# I downloaded the html manually

gisisCountries = {k: v for k, v in gisisDict.items() if k in countries}
listIMO = []
for v in gisisCountries.values():
    link = "https://gisis.imo.org/Public/ST/Ratification.aspx?cid=" + str(v)
    listIMO.append(link)

# for i in listIMO:
#     webbrowser.open(i)
Code
imoRatedf = pd.DataFrame(columns=["Country", 2012, 2016, 2021])

# loop thru the html files in the folder and extract the data
for i in range(len(os.listdir("../data/treatiesIMO/"))):
    countryIMO = np.nan
    imoRate = pd.read_html(
        "../data/treatiesIMO/Status of Treaties » Ratification of Treaties{}.html".format(
            i
        )
    )[4]
    # get the country name from the html file name by checking if string is in the list of countries
    for country in countries:
        if country in imoRate["Treaty"][0]:
            countryIMO = country
    if countryIMO is not np.nan:
        imoRate.columns = imoRate.iloc[1]
        imoRate = imoRate[2:]
        # new columns with the year of accession and denounced
        imoRate["accession"] = (
            imoRate["Date of entry into force in country"]
            .str.extract("^([^(]+)")
            .fillna("")
        )
        imoRate["denounced"] = imoRate[
            "Date of entry into force in country"
        ].str.extract(".*\\:(.*)\\).*")
        imoRate[["accession", "denounced"]] = (
            imoRate[["accession", "denounced"]]
            .apply(pd.DatetimeIndex)
            .apply(lambda x: x.dt.year)
        )
        # count the number of treaties each country accessioned and didn't denounced by 2012, 2016 and 2021
        for i in (2012, 2016, 2021):
            imoRate[str(i)] = np.where(
                (imoRate.accession < i)
                & ((imoRate.denounced > i) | (imoRate.denounced.isna())),
                1,
                0,
            )
        imoCount = (
            countryIMO,
            imoRate["2012"].sum(),
            imoRate["2016"].sum(),
            imoRate["2021"].sum(),
        )
        imoRatedf.loc[len(imoRatedf), imoRatedf.columns] = imoCount
# calculate total possible treaties, apply dif-ref and convert to percentage
targetIMO = len(imoRate.dropna(subset=["Date of entry into force in country"]))
imoRatedf = imoRatedf.set_index("Country").sort_index()
imoRatedf = 100 * imoRatedf / targetIMO
imoRatedf[imoRatedf > 100] = 100
imoRatedf = imoRatedf.astype(np.float64)
imoRatedf = imoRatedf.apply(pd.to_numeric)
imoRatedf

missingCountries(imoRatedf)
2012 2016 2021
Country
Belgium 78.431373 76.470588 90.196078
Denmark 80.392157 86.274510 92.156863
Estonia 76.470588 78.431373 82.352941
Finland 76.470588 78.431373 90.196078
France 84.313725 84.313725 96.078431
Germany 80.392157 82.352941 88.235294
Ireland 66.666667 68.627451 68.627451
Latvia 80.392157 80.392157 82.352941
Lithuania 58.823529 62.745098 64.705882
Netherlands 84.313725 86.274510 92.156863
Poland 80.392157 84.313725 86.274510
Portugal 68.627451 76.470588 86.274510
Spain 86.274510 86.274510 88.235294
Sweden 82.352941 90.196078 94.117647
United Kingdom 78.431373 78.431373 78.431373
No missing countries

Indicators aggregation

Given our ratio-scale full comparable indicators,IitI_{it}, meaningful aggregation of NN indicators into a composite indicator CItCI_t is obtained according to social choice theory by applying a generalized mean:

CIt(αit,Iit,σ)=(i=1NαitIitσ1σ)σσ1fort=2012,2016,2021(or most recent)CI_t(\alpha_{it},I_{it},\sigma) = \left(\sum^N_{i=1}\alpha_{it}I^{\frac{\sigma-1}{\sigma}}_{it}\right)^{\frac{\sigma}{\sigma-1}} \quad \text{for} \quad t = 2012, 2016, 2021 \text{(or most recent)}

with weights αit>0\alpha_{it} > 0 and 0σ0 \leq \sigma \leq \infty. The parameter σ\sigma is used to quantify the elasticity of substitution between the different indicators. High (low) values of σ\sigma imply good (poor) substitution possibilities which means that a high score in one indicator can (cannot) compensate a low score in another indicator. Consequently, high and low values of σ\sigma correspond to concepts of weak and strong sustainability, respectively. Depending on σ\sigma, one can obtain a full class of specific function forms for the composite indicator.

We define:

σTarget=0.5\sigma_{Target} = 0.5 and σTarget=10\sigma_{Target} = 10

Code
varDf = [
    nitro,
    phospho,
    eutro,
    wasteG,
    wasteR,
    mspVal,
    ohiBio,
    co2,
    phCopernicus,
    fmsyF,
    bBmsy,
    kbaMPA,
    natura2kEEZ,
    fseRiskkRatio,
    tacCatch,
    compositeIUU,
    fteCoastalPop,
    gvaHoursW,
    economies,
    gvaNights,
    oResearch,
    sadTac,
    ohiArt,
    threatenedFish,
    imoRatedf,
]
varNames = [
    "nitro",
    "phospho",
    "eutro",
    "wasteG",
    "wasteR",
    "mspVal",
    "ohiBio",
    "co2",
    "phCopernicus",
    "fmsyF",
    "bBmsy",
    "kbaMPA",
    "natura2kEEZ",
    "fseRiskkRatio",
    "tacCatch",
    "compositeIUU",
    "fteCoastalPop",
    "gvaHoursW",
    "economies",
    "gvaNights",
    "oResearch",
    "sadTac",
    "ohiArt",
    "threatenedFish",
    "imoRatedf",
]

dictIndicators = dict(zip(varNames, varDf))
# stack variables in each dataframe
for name, df in dictIndicators.items():
    df = df.stack().to_frame().rename(columns={0: str(name)})
    df.index.names = ["Country", "Year"]
    df.reset_index(inplace=True)
    df.Year = df.Year.astype(int)
    df.set_index(["Country", "Year"], inplace=True)
    dictIndicators[name] = df
# merge all variables into one dataframe
indicators = pd.concat(dictIndicators.values(), axis=1, join="outer")
years = list(range(2012, indicators.index.get_level_values("Year").max() + 1))
indicators = indicators.reset_index().sort_values(["Country", "Year"])
indicators = indicators[
    (indicators.Year.isin(years)) & (indicators.Country.isin(countries))
]
indicators.to_csv("../output/data/indicatorsOriginal.csv", index=False)
indicatorsOriginal = indicators.copy()
# We fill missing values using nearest, then forward and back fill by country
# We care about 2012, 2016, and most recent. Nearest only affects fseRiskRatio
# Solution from https://stackoverflow.com/a/70363774/14534411
indicators = indicators.groupby(["Country"], group_keys=True).apply(
    lambda group: group.interpolate(
        method="nearest",
        axis=0,
    )
    if np.count_nonzero(np.isnan(group)) < (len(group) - 1)
    else group
)
indicators = indicators.groupby(["Country"], group_keys=False).apply(
    lambda group: group.interpolate(method="linear", axis=0, limit_direction="both")
)
indicators.index = indicators.index.droplevel(1)
# We then use mean of other countries to fill countries missing all values. This affects:
# Finland (fseRIskRatio), France (gvaHoursW), Denmark (gvaHoursW, oResearch)
# Estonia, Latvia, Lithuania, Sweden (oResearch)
indicators = indicators.groupby("Year", group_keys=False).apply(
    lambda x: x.fillna(x.mean())
)
indicators = indicators.set_index("Year", append=True)
# 14.1 and 14.7 have subindicators that need to be combined into a composite indicator
compoIndicaDict = {
    "plastic": ["wasteG", "wasteR"],
    "nutrient": ["nitro", "phospho"],
    "livelihoods": ["fteCoastalPop", "gvaHoursW"],
}
# calculate composite indicators using weak sustainability. Funtion in compositeIndicators.py
for target, indicator in compoIndicaDict.items():
    try:
        alpha = 1 / len(compoIndicaDict[target])
        df = indicators[indicator]
        sigma = 10
        indicators[target] = ci.compositeDF(alpha, df, sigma)
    except KeyError:
        print("Missing indicator for", target)
indicators.to_csv("../output/data/indicatorsImputed.csv", index=True)

Target level aggregation

Code
targetsDict = {
    "14.1 Pollution": ["nutrient", "eutro", "plastic"],
    "14.2 Ecosystem": ["mspVal", "ohiBio"],
    "14.3 Acidification": ["co2", "phCopernicus"],
    "14.4 Sustainable Fishing": ["fmsyF", "bBmsy"],
    "14.5 Protection": ["kbaMPA", "natura2kEEZ"],
    "14.6 Incentives": ["fseRiskkRatio", "tacCatch", "compositeIUU"],
    "14.7 Economics": ["livelihoods", "economies", "gvaNights"],
    "14.a Science": ["oResearch", "sadTac"],
    "14.b Small-scale Fishing": ["ohiArt", "threatenedFish"],
    "14.c Marine Agreements": ["imoRatedf"],
}
# target using weak sustainability. 
targets = indicators.copy()
for target, indicator in targetsDict.items():
    try:
        alpha = 1 / len(targetsDict[target])
        df = targets[indicator]
        sigma = 10
        targets[target] = ci.compositeDF(alpha, df, sigma)
    except KeyError:
        print("Missing indicator for", target)
targets = targets[[i for i in targets if re.findall(r"14", i)]]
targets.to_csv("../output/data/targetsComposite.csv", index=True)

Goal Level Aggregation (Monte Carlo simulation)

Code source is in the composite.py file

Code
%%time
# calculate composite score for each target importing the function from composite.py
# monte carlo for strong sustainability and one sigma for weak sustainability
mostRecent = targets.index.get_level_values("Year").max()
scoresStrong = ci.compositeMC(df = targets, years=[2012, 2016, mostRecent], simulations=10_000)
scoresWeak = pd.DataFrame((1/len(targets.columns) * targets).sum(axis=1), columns=['scoreWeak'])
scoresWeak10 = pd.DataFrame(ci.compositeDF(alpha=1/len(targets.columns), df=targets, sigma=10), columns=['scoreWeak'])
scoresStrong = scoresStrong[scoresStrong.index.get_level_values('Country').isin(countries)]
CPU times: user 19.2 s, sys: 4.16 ms, total: 19.2 s
Wall time: 19.2 s
Code
# merge all data
data_frames = [indicators, targets, scoresStrong, scoresWeak]
finalData = reduce(
    lambda left, right: pd.merge(
        left, right, left_index=True, right_index=True, how="inner"
    ),
    data_frames,
).reset_index()

# translate score to ranking
finalData["rankMean"] = finalData.groupby("Year")["scoreMean"].rank(ascending=False)
finalData["rankStd"] = finalData["scoreStd"] * len(finalData["Country"].unique()) / 100
finalData["rankWeak"] = finalData.groupby("Year")["scoreWeak"].rank(ascending=False)
finalData.set_index(["Country", "Year"], inplace=True)
finalData.to_csv("../output/data/finalData.csv", index=True)

Progress assessment

Code
progress = indicatorsOriginal.copy().set_index("Country")

# progress = progress[progress.Country=='Germany']
# progress["YearDt"] = pd.to_datetime(progress.Year, format='%Y')
# progress['date_ordinal'] = pd.to_datetime(progress['YearDt']).map(dt.datetime.toordinal)
np.seterr(divide="ignore", invalid="ignore")

progressTrend = pd.DataFrame(
    columns=["Country", "Indicator", "Slope", "Intercept", "Rvalue", "Pvalue", "Stderr"]
)
for country in progress.index.unique():
    for indicator in progress.columns.drop(["Year"]):
        dfTemp = progress[(progress.index == country)][["Year", indicator]]
        if dfTemp[indicator].isnull().values.all():
            continue
        mask = ~np.isnan(dfTemp[indicator])
        statsDF = stats.linregress(dfTemp["Year"][mask], dfTemp[indicator][mask])
        progressTrend = pd.concat(
            [
                progressTrend,
                pd.DataFrame(
                    [
                        [
                            country,
                            indicator,
                            statsDF.slope,
                            statsDF.intercept,
                            statsDF.rvalue,
                            statsDF.pvalue,
                            statsDF.stderr,
                        ]
                    ],
                    columns=[
                        "Country",
                        "Indicator",
                        "Slope",
                        "Intercept",
                        "Rvalue",
                        "Pvalue",
                        "Stderr",
                    ],
                ),
            ]
        )
progressTrend.to_csv("../output/data/progressTrend.csv", index=False)

# progresTrendMean = progressTrend.groupby('Country').mean(numeric_only=True)
# mask = ~np.isnan(progress['nitro'])
# [stats.linregress(progress['Year'][mask], progress['nitro'][mask])]


# progress = progress[['kbaMPA']].dropna()
# progress
# progress.apply(lambda x: stats.linregress(progress.index, x)).rename(index={0: 'slope', 1:
#                                                                                   'intercept', 2: 'rvalue', 3:
#                                                                                   'p-value', 4:'stderr'})
# fig = px.scatter(df, progress['Year'], progress['nitro'], trendline="ols")
# fig.show()
{'divide': 'warn', 'over': 'warn', 'under': 'ignore', 'invalid': 'warn'}
Code
# For the plots, we stack all variables and categorize indicators by target
indicatorNames = {
    "nutrient": "Nutrient Balance",
    "eutro": "Eutrophication",
    "plastic": "Plastic Pollution",
    "economies": "Economies",
    "mspVal": "Spatial Planning",
    "ohiBio": "Biodiversity",
    "co2": "CO2 Emissions",
    "phCopernicus": "Acidification",
    "fmsyF": "Fmsy/F",
    "bBmsy": "B/Bmsy",
    "kbaMPA": "Biodiversity Areas",
    "natura2kEEZ": "Natura 2000",
    "fseRiskkRatio": "Subsidies",
    "tacCatch": "TAC/Catch",
    "compositeIUU": "Illegal Fishing",
    "livelihoods": "Livelihoods",
    "economies": "Economies",
    "gvaNights": "Tourism",
    "oResearch": "Marine Research",
    "sadTac": "SAD/TAC",
    "ohiArt": "Artisanal Fishing",
    "threatenedFish": "Threatened Fish",
    "imoRatedf": "IMO Agreements",
    "seaMining": "Seabed Mining",
}

plotsData = (
    finalData.stack().reset_index().rename(columns={0: "value", "level_2": "indicator"})
).set_index(["Country", "Year"])

targetIndicatorsPair = (
    pd.DataFrame.from_dict(targetsDict.items())
    .set_index(0)
    .explode(1)
    .reset_index()
    .rename(columns={0: "target", 1: "indicator"})
)
plotsData = (
    plotsData.reset_index()
    .merge(targetIndicatorsPair, on="indicator", how="outer")
    .set_index(["Country", "Year"])
)
plotsData.indicator = plotsData.indicator.map(indicatorNames).fillna(
    plotsData["indicator"]
)
mostRecent = plotsData.index.get_level_values("Year").max()
Code
progressTarget = progressTrend.merge(
    targetIndicatorsPair, left_on="Indicator", right_on="indicator", how="inner"
)
progressTarget = progressTarget[progressTarget.target.str.startswith("14")]
progressTarget = (
    progressTarget.groupby(["target", "Country"]).mean(numeric_only=True).reset_index()
)
progressTarget = progressTarget.groupby("Country").mean(numeric_only=True).reset_index()
progressTarget
Country Slope Intercept Rvalue Pvalue Stderr
0 Belgium -0.165177 410.831174 0.069701 0.381595 0.670321
1 Denmark 0.610405 -1158.714460 0.151814 0.386430 1.011044
2 Estonia 1.467129 -2887.786365 0.263440 0.327063 0.865248
3 Finland 0.710325 -1370.097317 0.063184 0.348336 0.532169
4 France 0.641057 -1223.960924 0.291571 0.234033 1.181515
5 Germany 0.109802 -149.265850 0.136830 0.366686 0.926220
6 Ireland 0.278099 -490.381043 0.237817 0.197020 0.846996
7 Latvia 0.733923 -1406.003104 0.142230 0.330611 1.303405
8 Lithuania 1.049761 -2048.124439 0.246228 0.314357 0.896504
9 Netherlands -0.300283 676.042944 0.246888 0.272813 0.904405
10 Poland 0.727603 -1402.720108 -0.004115 0.247569 0.613606
11 Portugal 0.971222 -1893.285099 0.192914 0.210846 0.463763
12 Spain 0.741538 -1428.585380 0.098117 0.345197 1.119423
13 Sweden 0.824298 -1593.598566 0.079965 0.252301 0.539738
14 United Kingdom 0.007181 57.615156 0.097329 0.321493 0.603858

Plots

Code
# We want the EEZ-weigthed average for the plots
eezAverage = pd.DataFrame(plotsData.reset_index()).merge(
    eez, left_on="Country", right_on="Country", how="left"
)
eezAverage["valueEEZ"] = eezAverage.value * eezAverage.Area_km2
eezAverage = (
    eezAverage.groupby(["Year", "indicator"])
    .sum(numeric_only=True)
    .drop("value", axis=1)
)
eezAverage["averageEEZ"] = eezAverage.valueEEZ / eezAverage.Area_km2
Code
InteractiveShell.ast_node_interactivity = "last_expr"  # last_expr

Box plots

Code
# define function to plot boxplots

def plotBox(
    df,
    xlegend,
    ylegend,
    maxShift,
    minShift,
    xaxis_title=str,
    yaxis_title=str,
    colorGroup=str,
):
    # start plots
    fig = px.box(
        df,
        x="indicator",
        y="value",
        boxmode="overlay",
        # color all grey
        color_discrete_sequence=["grey"],
    )
    # add Countries annotation in the max and min
    # create annotation list, x is the x-axis position of the annotation
    annoList = []

    maxCountriesD = {}
    x = 0
    for s in df.indicator.unique():
        # get max value for indicator
        maxVal = np.max(df[df["indicator"] == s]["value"])
        # get countries with max value, if more than 4 countries, use *
        countries = df.loc[
            (df["value"] == maxVal) & (df["indicator"] == s), "Country"
        ].values
        if len(countries) > 4:
            maxCountriesD[s] = countries
            countries = "*"
        countries = ",<br>".join(countries)
        annotation = dict(
            x=x,
            # y position is the max value
            y=maxVal,
            text=countries,
            yshift=maxShift,
            showarrow=False,
        )
        annoList.append(annotation)
        x += 1

    minCountriesD = {}
    x = 0
    for s in df.indicator.unique():
        # get min value for indicator
        minVal = np.min(df[df["indicator"] == s]["value"])
        # get countries with min value, if more than 4 countries, use * and store in dictionary
        countries = df.loc[
            (df["value"] == minVal) & (df["indicator"] == s), "Country"
        ].values
        if len(countries) > 4:
            minCountriesD[s] = countries
            countries = "*"
        countries = ",<br>".join(countries)
        annotation = dict(
            x=x,
            # y position is the min value
            y=minVal,
            text=countries,
            yshift=minShift,
            showarrow=False,
        )
        annoList.append(annotation)
        x += 1

    # add EEZ-weigthed average values
    mostRecent = eezAverage.index.get_level_values("Year").max()
    indicatorAverage = eezAverage.loc[
        (mostRecent, df.indicator.unique()), :
    ].reset_index()
    for s in indicatorAverage.indicator.unique():
        # get EEZ-weigthed average value for indicator
        averageVal = indicatorAverage[indicatorAverage["indicator"] == s][
            "averageEEZ"
        ].values[0]
        fig.add_scatter(
            x=[s],
            # y position is the average value
            y=[averageVal],
            type="scatter",
            mode="markers",
            legendgroup="EEZ-weighted average",
            name="EEZ-weighted average",
            marker=dict(color="black", size=6),
        )

    fig.update_layout(annotations=annoList)
    # fig.add_vline(x=2.5, line_width=3, line_color="black")

    # just to add the legend with one entry
    fig.update_traces(showlegend=False).add_scatter(
        x=[s],
        y=[averageVal],
        type="scatter",
        mode="markers",
        legendgroup="EEZ-weighted average",
        name="EEZ-weighted average",
        marker=dict(color="black", size=6),
    )

    # change legend position, axis titles
    fig.update_layout(
        legend=dict(
            x=xlegend,
            y=ylegend,
            traceorder="normal",
            font=dict(family="sans-serif", size=12, color="black"),
            bgcolor="White",
            bordercolor="black",
            borderwidth=1,
        ),
        xaxis_title=xaxis_title,
        yaxis_title=yaxis_title,
        plot_bgcolor="white",
        # yaxis_range=[-20,100]
    )
    fig.write_image("../output/figs/boxPlot{0}.png".format(xaxis_title), scale=2)
    fig.write_image("../output/figs/boxPlot{0}.pdf".format(xaxis_title))
    fig.show()
Code
# plot min, max, and EEZ-weigthed average for targets
# only for most recent year
boxPlotData = plotsData[
    plotsData.index.get_level_values("Year").isin([mostRecent])
].reset_index()
boxPlotData.Country = boxPlotData.Country.map(country_to_abbrev)
boxPlotData.sort_values(by=["target"], inplace=True)

indicatorsBox = boxPlotData[boxPlotData.indicator.isin(list(indicatorNames.values()))]

# plotBox(
#     df=indicatorsBox,
#     xaxis_title="Indicators",
#     yaxis_title="Percentage",
#     xlegend=0.85,
#     ylegend=0.2,
#     maxShift=30,
#     minShift=-20,
#     colorGroup="target",
# )

targetBox = boxPlotData[
    boxPlotData.indicator.isin(list(targetsDict.keys()))
    & boxPlotData.indicator.str.startswith("14")
]
plotBox(
    df=targetBox,
    xaxis_title="Targets",
    yaxis_title="Percentage",
    xlegend=0.42,
    ylegend=1.2,
    maxShift=30,
    minShift=-10,
    colorGroup="indicator",
)
Code
# Boxplots with subplots for each target
boxesLen = len(indicatorsBox.target.unique().tolist())
boxes = []

# Initialize the subplot figure
fig = make_subplots(
    rows=2,
    cols=int(boxesLen / 2),
    subplot_titles=indicatorsBox.target.unique().tolist(),
    shared_yaxes=True,
    horizontal_spacing=0.005,
    vertical_spacing=0.15,
    y_title="Percentage",
)

rowDict = {1: 1, 2: 1, 3: 1, 4: 1, 5: 1, 6: 2, 7: 2, 8: 2, 9: 2, 10: 2}

# Create a boxplot facet for each target
for i, target in enumerate(indicatorsBox.target.unique().tolist()):
    traceTemp = go.Box(
        y=indicatorsBox[indicatorsBox.target == target].value,
        x=indicatorsBox[indicatorsBox.target == target].indicator,
        name=target,
        marker=dict(color="grey"),
        showlegend=False,
    )
    boxes.append(traceTemp)

    # filter df to get EEZ-weighted average for each indicator within the target
    mostRecent = int(eezAverage.index.get_level_values("Year").max())
    indicatorAverage = eezAverage.loc[
        (mostRecent, indicatorsBox[indicatorsBox.target == target].indicator.unique()),
        :,
    ].reset_index()

    for s in indicatorAverage.indicator.unique():
        # get EEZ-weigthed average value for each indicator
        averageVal = indicatorAverage[indicatorAverage["indicator"] == s][
            "averageEEZ"
        ].values[0]
        tempScatter = go.Scatter(
            x=[s],
            y=[averageVal],
            type="scatter",
            mode="markers",
            legendgroup="EEZ-weighted average",
            name="EEZ-weighted average",
            marker=dict(color="black", size=6),
            showlegend=False,
        )
        fig.add_trace(tempScatter, row=rowDict[i + 1], col=i % 5 + 1)

        # max and min values for the indicator within the target
        maxVal = np.max(
            indicatorsBox[
                (indicatorsBox.target == target) & (indicatorsBox.indicator == s)
            ]["value"]
        )
        minVal = np.min(
            indicatorsBox[
                (indicatorsBox.target == target) & (indicatorsBox.indicator == s)
            ]["value"]
        )

        # get countries with max and min values and create annotation
        max_countries = indicatorsBox.loc[
            (indicatorsBox["value"] == maxVal)
            & (indicatorsBox["indicator"] == s)
            & (indicatorsBox["target"] == target),
            "Country",
        ].values
        min_countries = indicatorsBox.loc[
            (indicatorsBox["value"] == minVal)
            & (indicatorsBox["indicator"] == s)
            & (indicatorsBox["target"] == target),
            "Country",
        ].values

        if len(max_countries) > 3:
            max_countries = "*"
        else:
            max_countries = ", ".join(max_countries)

        if len(min_countries) > 3:
            min_countries = "*"
        else:
            min_countries = ", ".join(min_countries)

        max_annotation = dict(
            x=s,
            y=maxVal,
            text=max_countries,
            yshift=15,
            showarrow=False,
        )
        min_annotation = dict(
            x=s,
            y=minVal,
            text=min_countries,
            yshift=-15,
            showarrow=False,
        )

        fig.add_annotation(max_annotation, row=rowDict[i + 1], col=i % 5 + 1)
        fig.add_annotation(min_annotation, row=rowDict[i + 1], col=i % 5 + 1)

    fig.add_trace(traceTemp, row=rowDict[i + 1], col=i % 5 + 1)

# dummy trace for the legend
legend_entry = go.Scatter(
    x=[None],
    y=[None],
    mode='markers',
    marker=dict(color='black', size=6),
    name='EEZ-weighted average',
)
fig.add_trace(legend_entry)

fig.update_layout(
    showlegend=True,
    height=1000,
    width=900,
    title_x=0.5,
    legend = dict(
        x=0.4, y=1.07,
        font=dict(family="sans-serif", size=12, color="black"),
        bgcolor="White",
        bordercolor="black",
        borderwidth=1,
    ),
)

fig.update_xaxes(tickangle=45)
fig.update_annotations(font_size=12)
fig.write_image("../output/figs/boxPlotIndicators.png", scale=2)
fig.write_image("../output/figs/boxPlotIndicators.pdf")
fig.show()

Weak vs. Strong Sustainability

Code
# plot comparing strong and weak sustainability
weakVSstrong = finalData.copy().reset_index()
weakVSstrong
weakVSstrong.loc[weakVSstrong["Year"] == 2022, "Year"] = "Most recent year"
for year in [2012, 2016, "Most recent year"]:
    fig = px.scatter(
        weakVSstrong[weakVSstrong.Year.isin([year])],
        x="rankWeak",
        y="rankMean",
        text="Country",
        title=str(year),
        error_y=np.array(weakVSstrong[weakVSstrong.Year.isin([year])]["rankStd"]),
    )
    fig.update_traces(
        textposition="bottom right",
        marker=dict(color="black", size=0),
        error_y=dict(width=0),
    )
    # add 45 degree line
    fig.update_layout(
        shapes=[
            {
                "type": "line",
                "yref": "paper",
                "xref": "paper",
                "y0": 0,
                "y1": 1,
                "x0": 0,
                "x1": 1,
                "line": {"color": "grey", "width": 2},
                "layer": "below",
            }
        ]
    )
    # change axis titles, fix aspect ratio
    fig.update_layout(
        xaxis_title=r"$\text{Ranking with unlimited substitution possibilities} \quad \sigma= \infty$",
        yaxis_title=r"$\text{Ranking with limited substitution possibilities} \quad \sigma=\text{U(0,1)}$",
        font=dict(family="sans-serif"),
        autosize=False,
        width=800,
        height=800,
        # important for 45 degree line to show correctly
        yaxis=dict(range=[0, len(weakVSstrong["Country"].unique()) + 3]),
        xaxis=dict(range=[0, len(weakVSstrong["Country"].unique()) + 3]),
    )
    fig.write_image("../output/figs/weakVsStrong_{year}.pdf".format(year=year))
    fig.write_image("../output/figs/weakVsStrong_{year}.png".format(year=year), scale=2)
    fig.show()

Status vs. Progress

Code
statusProgress = finalData.reset_index()
statusProgress = statusProgress[statusProgress.Year == 2022].merge(
    progressTarget.reset_index(), on=["Country"], how="outer"
)
statusProgress.sort_values(by=["Slope"], inplace=True)
Code
#  EEZ-weigthed average for the scoreMean
scoreAvgEEZ = eezAverage.reset_index()
scoreAvgEEZ = scoreAvgEEZ[
    (scoreAvgEEZ.indicator == "scoreMean") & (scoreAvgEEZ.Year == 2022)
]["averageEEZ"].values[0]

#  EEZ-weigthed average for the progress
eezAvgProg = pd.DataFrame(progressTarget.reset_index()).merge(
    eez, on="Country", how="left"
)
eezAvgProg = sum(eezAvgProg.Slope * eezAvgProg.Area_km2 / sum(eezAvgProg.Area_km2))
Code
fig = px.scatter(
    statusProgress,
    x="Slope",
    y="scoreMean",
    hover_name="Country",
    text="Country",
    color_discrete_sequence=["black"],
)
fig.update_traces(textposition="top center")
fig.add_vline(x=eezAvgProg, line_width=0.5, line_color="black", line_dash="dash")
fig.add_hline(y=scoreAvgEEZ, line_width=0.5, line_color="black", line_dash="dash")


def improve_text_position(x):
    positions = [
        "middle right",
        "middle left",
        "middle right",
    ]
    return [positions[i % len(positions)] for i in range(len(x))]


fig.update_traces(
    textposition=improve_text_position(statusProgress["Slope"]),
)
fig.add_annotation(
    dict(
        font=dict(color="black", size=15),
        x=-0.5,
        y=72,
        showarrow=False,
        text="Losing Momentum",
        # xanchor='left',
    )
)
fig.add_annotation(
    dict(
        font=dict(color="black", size=15),
        x=-0.5,
        y=36,
        showarrow=False,
        text="Falling Further Behind",
        # xanchor='left',
    )
)
fig.add_annotation(
    dict(
        font=dict(color="black", size=15),
        x=1.7,
        y=36,
        showarrow=False,
        text="Catching up",
        xanchor="right",
    )
)
fig.add_annotation(
    dict(
        font=dict(color="black", size=15),
        x=1.7,
        y=72,
        showarrow=False,
        text="Moving Ahead",
        xanchor="right",
    )
)

fig.update_layout(
    title_text="Progress vs. Status in SDG 14",
    title_x=0.5,
    xaxis_title="Progress",
    yaxis_title="Status (most recent)",
    # paper_bgcolor='white',
    plot_bgcolor="white",
    xaxis=dict(range=[-0.7, 1.7]),
    font=dict(
        # family="Courier New, monospace",
        size=14,  # Set the font size here
        # color="RebeccaPurple"
    ),
    showlegend=True,
    height=800,
    width=1000,
)
fig.write_image("../output/figs/progressVsStatus.pdf")
fig.write_image("../output/figs/progressVsStatus.png", scale=2)
fig.show()
Code
# Load the Excel file into a DataFrame
file_path = r'C:\Users\chris\Documents\Ranks.xlsx'
df = pd.read_excel(file_path)

# Filter the DataFrame to only include data for the years 2012 and 2022
df_2012 = df[df['Year'] == 2012]
df_2022 = df[df['Year'] == 2022]

# Merge the dataframes based on Country name
merged_df = pd.merge(df_2012, df_2022, on='Country', suffixes=('_2012', '_2022'))

# Calculate the change in ranks for rankMean and rankWeak
merged_df['Change_rankMean'] = merged_df['rankMean_2022'] - merged_df['rankMean_2012']
merged_df['Change_rankWeak'] = merged_df['rankWeak_2022'] - merged_df['rankWeak_2012']

print(merged_df)
# Create the scatter plot
plt.figure(figsize=(10, 6))
plt.scatter(merged_df['Change_rankWeak'], merged_df['Change_rankMean'], color='blue')

# Add a dotted line at the zero points
plt.axvline(x=0, color='gray', linestyle='solid', linewidth=0.5)
plt.axhline(y=0, color='gray', linestyle='solid', linewidth=0.5)

# Add a 45-degree line from bottom left to top right
plt.plot([-15, 15], [-15, 15], color='gray', linestyle='dotted')

# Add country names to the points
texts = []
for i, row in merged_df.iterrows():
    texts.append(plt.text(row['Change_rankWeak'], row['Change_rankMean'], row['Country'], ha='center', va='center', fontsize=8))

# Adjust the positions of labels to prevent overlaps
adjust_text(texts, arrowprops=dict(arrowstyle="-", color='black'), autoalign='xy', force_text=0.1, lim=100)

# Set plot labels and title
plt.xlabel('Change in EU Ocean State Scores with unlimited substitution possibilities')
plt.ylabel('Change in EU Ocean State Scores with limited substitution possibilities')
plt.title('Ocean Development from 2012 to most recent under two Concepts of Sustainability')

# Display the plot
plt.tight_layout()
plt.show()

Results vs OHI & BHI

Code
# Compare scores to OHI and BHI scores
# BHI data https://github.com/OHI-Science/bhi/blob/master/baltic/scores.csv
# BHI regions https://github.com/OHI-Science/bhi/blob/6a80931c0bfbaa05c421abf2fabd2bad03943f47/baltic2019draft/layers/rgns_complete.csv

bhi = pd.read_csv("../data/scoresBHIdraft.csv")
bhi = bhi[bhi.dimension == "status"]
regionsID = pd.read_csv("../data/rgnsBHI_complete.csv")
bhi = bhi.merge(regionsID, on="region_id")
bhiStatus = bhi.groupby(["eez"]).mean(numeric_only=True).reset_index()
ohi = pd.read_csv("../data/scoresOHI.csv")
ohi = ohi[
    ohi["region_name"].isin(countries)
    & (ohi.dimension == "status")
    & (ohi.scenario == 2022)
]
ohiAvgStatus = (
    ohi.groupby(["region_name", "scenario"]).mean(numeric_only=True).reset_index()
)
ohiBhiCompare = (
    finalData.reset_index()[["Country", "Year", "scoreMean", "scoreStd", "scoreWeak"]]
    .merge(
        ohiAvgStatus, left_on=["Country", "Year"], right_on=["region_name", "scenario"]
    )
    .merge(bhiStatus, left_on="Country", right_on="eez", how="left")
)
ohiBhiCompare = ohiBhiCompare.rename(
    columns={"value": "OHI_Status", "score": "BHI_Status"}
)[["Country", "scoreMean", "scoreStd", "scoreWeak", "OHI_Status", "BHI_Status"]]

pearsonr(ohiBhiCompare.scoreMean, ohiBhiCompare.OHI_Status)
pearsonr(ohiBhiCompare.dropna().scoreMean, ohiBhiCompare.dropna().BHI_Status)
PearsonRResult(statistic=-0.1962758227908415, pvalue=0.6413253073648895)
Code
pearsonr(ohiBhiCompare.scoreMean, ohiBhiCompare.OHI_Status)
PearsonRResult(statistic=-0.13469174756194333, pvalue=0.6322276746563491)
Code
ohiBhiStack = (
    ohiBhiCompare.set_index("Country")
    .stack()
    .reset_index()
    .rename(columns={"level_1": "assessment", 0: "Value"})
)

fig = go.Figure()
fig.add_trace(
    go.Bar(
        y=ohiBhiStack[ohiBhiStack.assessment.isin(["scoreMean"])]["Value"],
        x=ohiBhiStack[ohiBhiStack.assessment.isin(["scoreMean"])]["Country"],
        name="SDG 14 Strong",
        error_y=dict(
            type="data",
            array=ohiBhiStack[ohiBhiStack.assessment.isin(["scoreStd"])]["Value"],
            visible=True,
            width=0,
        ),
        marker=dict(color="white", line=dict(width=2, color="grey")),
    )
)
fig.add_trace(
    go.Bar(
        y=ohiBhiStack[ohiBhiStack.assessment.isin(["scoreWeak"])]["Value"],
        x=ohiBhiStack[ohiBhiStack.assessment.isin(["scoreWeak"])]["Country"],
        marker=dict(
            color="white",
            line=dict(width=2, color="grey"),
            pattern_shape=".",
            pattern_size=5,
        ),
        name="SDG 14 Weak",
    )
)
fig.add_trace(
    go.Bar(
        y=ohiBhiStack[ohiBhiStack.assessment.isin(["OHI_Status"])]["Value"],
        x=ohiBhiStack[ohiBhiStack.assessment.isin(["OHI_Status"])]["Country"],
        name="OHI Score",
        marker=dict(color="grey"),
        # marker_pattern_shape="x",
    )
)
fig.add_trace(
    go.Bar(
        y=ohiBhiStack[ohiBhiStack.assessment.isin(["BHI_Status"])]["Value"],
        x=ohiBhiStack[ohiBhiStack.assessment.isin(["BHI_Status"])]["Country"],
        name="BHI Score",
        marker=dict(color="black"),
        # marker_pattern_shape=".",
    )
)
# fig.add_trace(go.Bar(
#     y = ohiBhiStack[ohiBhiStack.assessment.isin(["scoreMean"])]["scoreStd"],
#     x = ohiBhiStack[ohiBhiStack.assessment.isin(["scoreMean"])]["Country"],
#     # name='error1',
#     error_x = dict(array=[0.5]*3),
#     offset=[-0.6,-0.6,-0.6],
#     marker=dict(color='rgba(0,0,0,0)', line=dict(width=0)),
# ))
fig.update_layout(
    barmode="group",
    paper_bgcolor="white",
    plot_bgcolor="white",
    title="Comparison of SDG 14 and OHI/BHI Scores",
    xaxis_title="Country",
    yaxis_title="Score",
    title_x=0.5,
    autosize=False,
    width=1000,
    height=400,
)
fig.write_image("../output/figs/sdg14VsOhiBhi.pdf")
fig.write_image("../output/figs/sdg14VsOhiBhi.png", scale=2)
fig.show()

LaTeX table

Code
# Create table for manuscript
tableLatex = finalData.reset_index()
tableLatex = tableLatex[
    [
        "Year",
        "Country",
        "scoreMean",
        "scoreStd",
        "rankMean",
        "rankStd",
        "scoreWeak",
        "rankWeak",
    ]
].set_index(["Country", "Year"])
mostRecent = tableLatex.index.get_level_values("Year").max()
tableLatex = tableLatex.stack().unstack([1, 2])
tableLatex = tableLatex.rename_axis(["Year", "Value"], axis=1)
tableLatex = tableLatex[
    [
        # score strong
        (2012, "scoreMean"),
        (2012, "scoreStd"),
        (2016, "scoreMean"),
        (2016, "scoreStd"),
        (mostRecent, "scoreMean"),
        (mostRecent, "scoreStd"),
        # rank strong
        (2012, "rankMean"),
        (2012, "rankStd"),
        (2016, "rankMean"),
        (2016, "rankStd"),
        (mostRecent, "rankMean"),
        (mostRecent, "rankStd"),
        # score weak
        (2012, "scoreWeak"),
        (2012, "rankWeak"),
        (2016, "scoreWeak"),
        (2016, "rankWeak"),
        (mostRecent, "scoreWeak"),
        (mostRecent, "rankWeak"),
        # rank weak
    ]
]
conceptL = ["Concept of Strong Sustainability ( σ ∼ U (0,1), N = 10,000)"] * 12 + [
    "Concept of Weak Sustainability (σ → ∞)"
] * 6

# add multiindex to tableLatex using conceptL list
# tableLatex.columns = pd.MultiIndex.from_arrays([conceptL, tableLatex.columns.levels[0],tableLatex.columns.levels[1] ],names=[["Concept", "Year", 'Value']])
tableLatex = tableLatex.sort_values(by=(2012, "rankMean"))
tableLatex = tableLatex.style.format(precision=2)
# tableLatex
# print(tableLatex.to_latex(siunitx=True, hrules=True))

Trash bin

2. Carbon emissions per capita

Several sources (see code)

Code
# These data SHALL NOT BE USED. See reason on why ENV_AIR_GGE is preferable for the calculation:
# (https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Greenhouse_gas_emission_statistics_-_emission_inventories)
# (https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Greenhouse_gas_emission_statistics_-_air_emissions_accounts&oldid=551152#Greenhouse_gas_emissions)
# CO2, KG_HAB, Eurostat
# https://ec.europa.eu/eurostat/databrowser/view/ENV_AC_AINAH_R2/

# co2 = pd.read_csv('../data/env_ac_ainah_r2.csv')
# co2 = co2[co2['geo'].isin(countryAbb)]
# co2['geo'] = co2['geo'].map(abbrev_to_country).fillna(co2['geo'])
# co2 = co2.pivot_table(columns='TIME_PERIOD', index='geo', values='OBS_VALUE', aggfunc='mean')

# mr = co2.columns[-1] # most recent year
# # co2[[2012, 2016, mr]]/1000

1. Greenhouse gas emissions under the Effort Sharing Decision (ESD)

Several sources (see code)

2. Carbon emissions per capita

Several sources (see code)

Code
# # CO2, TOTX4_MEMONIA, THS_T thousand tonnes, Eurostat
# # https://ec.europa.eu/eurostat/databrowser/view/ENV_AIR_GGE/

# co2 = pd.read_csv("../data/env_air_gge.csv")
# co2["geo"] = co2["geo"].map(abbrev_to_country).fillna(co2["geo"])
# co2 = co2[
#     co2["geo"].isin(countries)
#     & (co2.airpol == "CO2")
#     & (co2.src_crf == "TOTX4_MEMONIA")
# ]
# co2 = co2.pivot_table(
#     columns="TIME_PERIOD", index="geo", values="OBS_VALUE", aggfunc="mean"
# )

# mr = co2.columns[-1]  # most recent year
# co2pc = co2 / pop * 1000  ## tonnes co2 per capita
# co2pc.dropna(how="all", inplace=True)
# # maxMin transformation as (max-x)/(max-min) * 100 --> converts to 0-100 scale with 0=worst, 100=best
# co2pc = (
#     (co2pc.loc[:, 2012:mr].max().max() - co2pc.loc[:, 2012:mr])
#     / (co2pc.loc[:, 2012:mr].max().max() - co2pc.loc[:, 2012:mr].min().min())
#     * 100
# ).round(2)

# co2pc[[2012, 2016, mr]]

# missingCountries(co2pc)
Code
# # Greenhouse gas emissions under the Effort Sharing Decision (ESD), Million tonnes CO2 equivalent (Mt CO2 eq), European Environment Agency
# # https://www.eea.europa.eu/data-and-maps/data/esd-4

# ghgESD = pd.read_excel("../data/EEA_ESD-GHG_2022.xlsx", sheet_name=1, skiprows=11)
# ghgESD = ghgESD[ghgESD["Geographic entity"].isin(countries)]
# ghgESD = ghgESD.set_index("Geographic entity")
# ghgESD = ghgESD.dropna(axis=1, how="all")
# negativeValues(ghgESD)
# mr = ghgESD.columns[-1]  # most recent year
# ghgESD = ghgESD * 1_000_000
Code
# # Allocation for 2020 target
# # https://ec.europa.eu/clima/ets/esdAllocations.do?languageCode=en&esdRegistry=-1&esdYear=&search=Search&currentSortSettings=
# allocation2020 = pd.read_xml(
#     "../data/esdAllocations2020.xml", xpath=".//ESDAllocations/ESDAllocationInfo"
# )
# allocation2020["Country"] = (
#     allocation2020["ESDMemberState"]
#     .map(abbrev_to_country)
#     .fillna(allocation2020["ESDMemberState"])
# )
# allocation2020 = allocation2020[allocation2020["Country"].isin(countries)]
# allocation2020 = allocation2020.pivot_table(
#     columns="ESDYear", index="Country", values="Allocation", aggfunc="mean"
# )

# # Allocation for 2030 target
# # https://eur-lex.europa.eu/legal-content/EN/TXT/HTML/?uri=CELEX:32020D2126
# allocation2030 = pd.read_html("../data/esdAllocations2030.html")[13][1:]
# allocation2030.columns = allocation2030.iloc[0]
# allocation2030 = allocation2030[1:]
# allocation2030.loc[
#     allocation2030["Member State"].str.contains("Netherlands"), "Member State"
# ] = "Netherlands"
# allocation2030 = allocation2030[allocation2030["Member State"].isin(countries)]
# allocation2030.set_index("Member State", inplace=True)
# for col in allocation2030.columns:
#     allocation2030[col] = allocation2030[col].apply(
#         lambda x: str(x).replace("\xa0", "")
#     )
# allocation2030 = allocation2030.astype(int)

# # Merge 2005 values with 2020 and 2030 allocations. Interpolate for years 2006-2012
# allocationESD = ghgESD[[2005]].merge(
#     allocation2020.merge(
#         allocation2030, left_index=True, right_index=True, how="outer"
#     ),
#     left_index=True,
#     right_index=True,
#     how="outer",
# )
# allocationESD.columns = allocationESD.columns.astype(int)
# allocationESD[list(range(2006, 2013))] = np.nan  # create empty columns for 2006-2012
# allocationESD = allocationESD[list(range(2005, 2031))]
# allocationESD.interpolate(axis=1, inplace=True)

# # Calculate score for ESD
# scoreESD = 100 - (100 * (ghgESD - allocationESD) / allocationESD)
# scoreESD[scoreESD > 100] = 100
# scoreESD = scoreESD.ffill(axis=1)  # fill empty values with last available year
# scoreESD[[2013, 2016, 2021]]
# missingCountries(scoreESD)

Alternative (dif-ref) score for ESD (measuring this way does not make a lot of sense to me)

Code
# # Alternative (dif-ref) score for ESD (measuring this way does not make a lot of sense to me)

# scoreESD2020 = 100 - 100 * (
#     ghgESD.loc[:, :2020].subtract(allocationESD[2020], axis=0)
# ).divide(allocationESD[2020], axis=0)
# scoreESD2030 = 100 - 100 * (
#     ghgESD.loc[:, 2021:].subtract(allocationESD[2030], axis=0)
# ).divide(allocationESD[2030], axis=0)
# scoreESD1 = scoreESD2020.merge(scoreESD2030, left_index=True, right_index=True)
# scoreESD1[scoreESD1 > 100] = 100
# # scoreESD1[[2012, 2018, 2021]]
Code
# # Effort sharing regulation
# # Get the targets for 2020 and 2030 in percentage
# # Member State greenhouse gas emission limits in 2020 and 2030 compared to 2005 greenhouse gas emissions levels
# # There targets for 2020 and for 2030
# # https://www.eea.europa.eu/data-and-maps/figures/national-progress-towards-greenhouse-gas
# # Official journals with the data can be found at (https://climate.ec.europa.eu/eu-action/effort-sharing-member-states-emission-targets_en)

# limitESR = pd.read_excel('../data/targetsESR/FIG2-154307-CLIM058-v2-Data.xlsx', sheet_name=1, skiprows=19, header=1, skipfooter=32)
# limitESR = limitESR.rename(columns={'Unnamed: 0':'Country', '(Resulting) ESR target 2030 (AR4)':'2030ESRtarget','ESR limit for 2020':'2020ESRtarget', '2005 ESD BJ':'2005Level'})
# limitESR = limitESR[['Country', '2020ESRtarget','2030ESRtarget','2005Level']]
# limitESR.set_index('Country', inplace=True)
# limitESR = limitESR[limitESR.index.isin(countries)]
# #UK is not in the dataset, we need to add from the official journal
# limitESR
# missingCountries(limitESR)

1. Fishing Subsidies relative to Landings

Several sources

Code
# # Landing data for Fishing Subsidies

# # load oecd abbreviation list
# with open("../data/oecdAbb.csv") as csv_file:
#     reader = csv.reader(csv_file)
#     oecdAbb = dict(reader)

# # Landings in USD
# # https://data.oecd.org/fish/fish-landings.htm

# landing = pd.read_csv("../data/fishLandingsOECD.csv")
# landing["LOCATION"] = landing["LOCATION"].map(oecdAbb).fillna(landing["LOCATION"])
# landing = landing[
#     landing["LOCATION"].isin(countries)
#     & (landing["MEASURE"] == "USD")
#     & (landing["SUBJECT"] == "TOT")
# ]
# # landing.pivot_table(columns='TIME', index='LOCATION', values='Value', aggfunc='mean')
# # landing = landing[list(range(2010, 2021))]
# landing.rename(
#     columns={"LOCATION": "Country", "TIME": "Year", "Value": "Landing"}, inplace=True
# )
# landing.set_index(["Country", "Year"], inplace=True)


# # merge subsidies-landings and calculate score
# fseLanding = pd.merge(
#     fseRisk,
#     landing,
#     how="left",
#     left_on=["Country", "Year"],
#     right_on=["Country", "Year"],
# )
# fseLanding["fseLanding"] = fseLanding.Value / fseLanding.Landing
# fseLanding = fseLanding.pivot_table(
#     columns="Year", index="Country", values="fseLanding", aggfunc="mean"
# )
# mr = fseLanding.columns[-1]  # most recent year
# # Poland is an outlier
# # fseLanding = fseLanding[~fseLanding.index.str.contains("Poland")]
# # maxMin transformation as (max-x)/(max-min) * 100 --> converts to 0-100 scale with 0=worst, 100=best

# fseScore = (
#     (fseLanding.loc[:, 2012:mr].max().max() - fseLanding.loc[:, 2012:mr])
#     / (fseLanding.loc[:, 2012:mr].max().max() - fseLanding.loc[:, 2012:mr].min().min())
#     * 100
# ).round(2)
# # fseScore = fseScore.ffill(axis=1)  # fill empty values with last available year
# fseScore[[2012, 2016, mr]]
# missingCountries(fseScore)
Code
# From 14.6, using Eurostat data for landing values.

# Even though the USD-EUR discrepancy does not affect the ratio we calculate,
# we get today's exchange rate to convert landing values to USD and have a consistent unit
# Solution source: https://stackoverflow.com/a/17250702/14534411


# r = requests.get('http://www.ecb.int/stats/eurofxref/eurofxref-daily.xml', stream=True)
# tree = ET.parse(r.raw)
# root = tree.getroot()
# namespaces = {'ex': 'http://www.ecb.int/vocabulary/2002-08-01/eurofxref'}
# currency = 'USD'
# match = root.find('.//ex:Cube[@currency="{}"]'.format(currency.upper()), namespaces=namespaces)
# eurTOusd = float(match.attrib['rate'])

# Landings of fishery products, TOTAL FISHERY PRODUCTS, Euro, Eurostat
# https://ec.europa.eu/eurostat/databrowser/view/FISH_LD_MAIN/

# landing = pd.read_csv('../data/fish_ld_main.csv')
# landing['Country'] = landing.geo.map(abbrev_to_country).fillna(landing.geo)
# landing = landing[landing.Country.isin(countries)]
# landing = landing[['Country', 'TIME_PERIOD', 'OBS_VALUE', 'unit']]
# landing['landingUSD'] = landing.OBS_VALUE * eurTOusd
# landing.pivot_table(columns='TIME_PERIOD', index='Country', values='landingUSD', aggfunc='mean')
Code
# Alternative dataset for fisheries subsidies

# https://www.sciencedirect.com/science/article/abs/pii/S0308597X16000026#bib4


# sumaila2009 = pd.read_excel("../data/subsidies2016Sumaila.xlsx")
# sumaila2009["Countries"] = sumaila2009["Countries"].str.strip()
# sumaila2009 = sumaila2009[
#     sumaila2009["Countries"].isin(countries) | sumaila2009["Countries"].isin(countryAbb)
# ]
# sumaila2009.Countries = sumaila2009.Countries.map(abbrev_to_country).fillna(
#     sumaila2009.Countries
# )
# sumaila2009 = sumaila2009[["Countries", "ReEst_Subsidy2009", "SubType"]]
# sumaila2009 = sumaila2009.groupby(["Countries"]).sum(numeric_only=True)
# # sumaila2009

# missingCountries(sumaila2009)

# sumaila2018 = pd.read_csv("../data/subsidies2019Sumaila.csv")
# sumaila2018 = sumaila2018[["Country", "Constant 2018 USD", "Type"]]
# sumaila2018 = sumaila2018[sumaila2018.Country.isin(countries)]
# sumaila2018 = sumaila2018.groupby(["Country"]).sum(numeric_only=True)
# # sumaila2018

# missingCountries(sumaila2018)

OHI Habitat subgoal (Biodiversity)

Code
# # habitat subgoal
# # https://github.com/OHI-Science/ohi-global/tree/draft/yearly_results/global2022/Results/data
# years = [2012, 2016, 2022]
# habitatOHI = pd.DataFrame()
# for year in years:
#     tempDF = pd.read_csv("../data/habitatOHI/scores_eez_{0}.csv".format(year))
#     tempDF["year"] = year
#     habitatOHI = pd.concat([habitatOHI, tempDF])
# habitatOHI = habitatOHI[habitatOHI.region_name.isin(countries)][
#     ["region_name", "year", "HAB"]
# ]
# habitatOHI = habitatOHI.pivot_table(index="region_name", columns="year", values="HAB")
# habitatOHI
# missingCountries(habitatOHI)

EEZ as per eea.europa.eu

Code
# EEZ as per the EEA (France does not include outermost regions)
# https://www.eea.europa.eu/data-and-maps/daviz/n2k-cover-in-national-marine-waters/#tab-chart_2

# eezEEA = pd.read_csv("../data/n2k-cover-in-national-marine-waters.csv")
# eezEEA["eezEEA"] = (
#     eezEEA["Natura 2000 surface area:number"]
#     / eezEEA["Natura 2000 cover in national waters:number"]
#     * 100
# )
# eezEEA.rename(columns={"Country:text": "Country"}, inplace=True)
# eezEEA.set_index("Country", inplace=True)
# eezEEA = eezEEA[(eezEEA.index.isin(countries))]
# eezEEA.eezEEA.astype(int)

MPAs as defined by IUCN

Code
# iso3 = pd.read_csv("../data/iso3.csv")
# iso3 = iso3[iso3.name.isin(countries + outermost)]
# iso3Countries = list(iso3.iso3)
# dictISO3 = iso3.set_index("iso3").to_dict()["name"]

# # Protected Planet WDPA data
# # https://www.protectedplanet.net/en/thematic-areas/wdpa?tab=WDPA
# # Metadata https://wdpa.s3-eu-west-1.amazonaws.com/WDPA_Manual/English/WDPA_WDOECM_Manual_1_6.pdf
# wdpa = pd.read_csv("../data/WDPA_Jul2023_Public_csv/WDPA_Jul2023_Public_csv.csv")
Code
# wdpaDF = wdpa[
#     (wdpa.MARINE.isin([1, 2]))
#     & (wdpa.PA_DEF == 1)
#     & (wdpa.ISO3.isin(iso3Countries + ["BLM;GLP;MAF;MTQ"]))
#     & (wdpa.STATUS.isin(["Designated"]))
#     # & (wdpa.DESIG_ENG.str.contains('Directive')) # Check for Natura 2000 sites
# ]
# mpaProtected = pd.DataFrame()
# for year in [2012, 2016, 2022]:
#     tempDF = (
#         wdpaDF[wdpaDF.STATUS_YR <= year].groupby("PARENT_ISO3").sum(numeric_only=True)
#     )
#     tempDF["year"] = year
#     mpaProtected = pd.concat([mpaProtected, tempDF])
# mpaProtected.index = mpaProtected.index.map(dictISO3)
# mpaProtected = mpaProtected.merge(eez, left_index=True, right_index=True)
# mpaProtected["mpaEEZ"] = mpaProtected.REP_M_AREA / mpaProtected.Area_km2 * 100
# mpaProtected["Score"] = 100 * (mpaProtected.mpaEEZ) / 30
# mpaProtected.loc[mpaProtected["Score"] > 100, "Score"] = 100
# mpaProtected = mpaProtected.pivot(columns="year", values="Score")
# mpaProtected
# missingCountries(mpaProtected)
Code
# # Old MPA data (similar results to Natura2k, even though they use the Protected Planet data)
# # Marine protected areas (% of territorial waters), World Bank aggregation of https://www.protectedplanet.net/en/thematic-areas/marine-protected-areas
# # https://data.worldbank.org/indicator/ER.MRN.PTMR.ZS

# mpa = pd.read_csv("../data/API_ER.MRN.PTMR.ZS_DS2.csv", skiprows=4)
# mpa = mpa[mpa["Country Name"].isin(countries)].set_index("Country Name")
# mpa = mpa.dropna(axis=1, how="all")
# mpa = mpa.drop(["Country Code", "Indicator Name", "Indicator Code"], axis=1)
# # display all rows
# negativeValues(mpa)
# mpa = (mpa / 0.3).round(2)  # dis-ref with target 30%
# mpa[mpa > 100] = 100
# mpa.sort_index(inplace=True)
# mpa[["2016", "2021"]]
# missingCountries(mpa)

Measures under the Marine Strategy Framework Directive (DROPPED)

Code
# The barplot here: https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=CELEX:52018DC0562&from=EN
# Comes from https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=CELEX:52018SC0393

# The analogous report for 2020 is here https://environment.ec.europa.eu/system/files/2023-04/C_2023_2203_F1_COMMUNICATION_FROM_COMMISSION_EN_V5_P1_2532109.PDF
# But the assessment is much shorter. They refer the reader to a JRC report:
# https://publications.jrc.ec.europa.eu/repository/handle/JRC129363
# That report assesses all the descriptors, but it cannot be compared to the previous assessment.
# Moreover, the source code and data are not available.

# Overall, it is hard to make an indicator for the measures taken against pressure indicators by the MS.
# Countries report different measures and data is poor.